From 006aaa64e166f656a21ce115f010e8f4fb93f8a8 Mon Sep 17 00:00:00 2001 From: Donato Capitella Date: Wed, 17 Sep 2025 10:41:14 +0100 Subject: [PATCH] Updated benchmarks --- ...K_XL-00001-of-00002__rocm6_4_3-rocwmma.log | 6 +- ...00001-of-00002__rocm6_4_3-rocwmma__fa1.log | 6 +- ...001-of-00002__rocm6_4_3-rocwmma__hblt0.log | 6 +- ...f-00002__rocm6_4_3-rocwmma__hblt0__fa1.log | 6 +- ...r-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log | 6 +- ...Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log | 6 +- ..._K_XL-00001-of-00002__rocm6_4_3__hblt0.log | 8 +- ...-00001-of-00002__rocm6_4_3__hblt0__fa1.log | 8 +- ...-00002__rocm7_rc-rocwmma-fa_all_quants.log | 10 + ...2__rocm7_rc-rocwmma-fa_all_quants__fa1.log | 10 + ..._rocm7_rc-rocwmma-fa_all_quants__hblt0.log | 10 + ...7_rc-rocwmma-fa_all_quants__hblt0__fa1.log | 10 + ..._K_XL-00001-of-00002__rocm7_rc-rocwmma.log | 6 +- ...-00001-of-00002__rocm7_rc-rocwmma__fa1.log | 6 +- ...0001-of-00002__rocm7_rc-rocwmma__hblt0.log | 6 +- ...of-00002__rocm7_rc-rocwmma__hblt0__fa1.log | 6 +- ...ir-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log | 6 +- ...-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log | 6 +- ...4_K_XL-00001-of-00002__rocm7_rc__hblt0.log | 8 +- ...L-00001-of-00002__rocm7_rc__hblt0__fa1.log | 8 +- ...-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log | 6 +- ..._XL-00001-of-00002__vulkan_amdvlk__fa1.log | 6 +- ...UD-Q4_K_XL-00001-of-00002__vulkan_radv.log | 6 +- ..._K_XL-00001-of-00002__vulkan_radv__fa1.log | 6 +- ...K_XL-00001-of-00003__rocm6_4_3-rocwmma.log | 6 +- ...00001-of-00003__rocm6_4_3-rocwmma__fa1.log | 6 +- ...001-of-00003__rocm6_4_3-rocwmma__hblt0.log | 2 +- ...f-00003__rocm6_4_3-rocwmma__hblt0__fa1.log | 2 +- ...r-UD-Q6_K_XL-00001-of-00003__rocm6_4_3.log | 6 +- ...Q6_K_XL-00001-of-00003__rocm6_4_3__fa1.log | 8 +- ..._K_XL-00001-of-00003__rocm6_4_3__hblt0.log | 2 +- ...-00001-of-00003__rocm6_4_3__hblt0__fa1.log | 8 +- ...-00003__rocm7_rc-rocwmma-fa_all_quants.log | 10 + ...3__rocm7_rc-rocwmma-fa_all_quants__fa1.log | 10 + ..._rocm7_rc-rocwmma-fa_all_quants__hblt0.log | 10 + ...7_rc-rocwmma-fa_all_quants__hblt0__fa1.log | 10 + ..._K_XL-00001-of-00003__rocm7_rc-rocwmma.log | 6 +- ...-00001-of-00003__rocm7_rc-rocwmma__fa1.log | 6 +- ...0001-of-00003__rocm7_rc-rocwmma__hblt0.log | 6 +- ...of-00003__rocm7_rc-rocwmma__hblt0__fa1.log | 6 +- ...ir-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log | 6 +- ...-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log | 6 +- ...6_K_XL-00001-of-00003__rocm7_rc__hblt0.log | 6 +- ...L-00001-of-00003__rocm7_rc__hblt0__fa1.log | 8 +- ...-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log | 6 +- ..._XL-00001-of-00003__vulkan_amdvlk__fa1.log | 6 +- ...UD-Q6_K_XL-00001-of-00003__vulkan_radv.log | 6 +- ..._K_XL-00001-of-00003__vulkan_radv__fa1.log | 6 +- ...K_XL-00001-of-00002__rocm6_4_3-rocwmma.log | 8 +- ...00001-of-00002__rocm6_4_3-rocwmma__fa1.log | 6 +- ...001-of-00002__rocm6_4_3-rocwmma__hblt0.log | 2 +- ...f-00002__rocm6_4_3-rocwmma__hblt0__fa1.log | 2 +- ...t-UD-Q8_K_XL-00001-of-00002__rocm6_4_3.log | 4 +- ...Q8_K_XL-00001-of-00002__rocm6_4_3__fa1.log | 4 +- ..._K_XL-00001-of-00002__rocm6_4_3__hblt0.log | 2 +- ...-00001-of-00002__rocm6_4_3__hblt0__fa1.log | 2 +- ...-00002__rocm7_rc-rocwmma-fa_all_quants.log | 10 + ...2__rocm7_rc-rocwmma-fa_all_quants__fa1.log | 10 + ..._rocm7_rc-rocwmma-fa_all_quants__hblt0.log | 10 + ...7_rc-rocwmma-fa_all_quants__hblt0__fa1.log | 10 + ..._K_XL-00001-of-00002__rocm7_rc-rocwmma.log | 4 +- ...-00001-of-00002__rocm7_rc-rocwmma__fa1.log | 6 +- ...0001-of-00002__rocm7_rc-rocwmma__hblt0.log | 4 +- ...of-00002__rocm7_rc-rocwmma__hblt0__fa1.log | 6 +- ...ct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log | 4 +- ...-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log | 8 +- ...8_K_XL-00001-of-00002__rocm7_rc__hblt0.log | 4 +- ...L-00001-of-00002__rocm7_rc__hblt0__fa1.log | 8 +- ...-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log | 6 +- ..._XL-00001-of-00002__vulkan_amdvlk__fa1.log | 6 +- ...UD-Q8_K_XL-00001-of-00002__vulkan_radv.log | 4 +- ..._K_XL-00001-of-00002__vulkan_radv__fa1.log | 4 +- ...Q6_K-00001-of-00002__rocm6_4_3-rocwmma.log | 8 +- ...00001-of-00002__rocm6_4_3-rocwmma__fa1.log | 6 +- ...001-of-00002__rocm6_4_3-rocwmma__hblt0.log | 8 +- ...f-00002__rocm6_4_3-rocwmma__hblt0__fa1.log | 2 +- ...nstruct-Q6_K-00001-of-00002__rocm6_4_3.log | 8 +- ...ct-Q6_K-00001-of-00002__rocm6_4_3__fa1.log | 8 +- ...-Q6_K-00001-of-00002__rocm6_4_3__hblt0.log | 2 +- ...-00001-of-00002__rocm6_4_3__hblt0__fa1.log | 2 +- ...-00002__rocm7_rc-rocwmma-fa_all_quants.log | 10 + ...2__rocm7_rc-rocwmma-fa_all_quants__fa1.log | 10 + ..._rocm7_rc-rocwmma-fa_all_quants__hblt0.log | 10 + ...7_rc-rocwmma-fa_all_quants__hblt0__fa1.log | 10 + ...-Q6_K-00001-of-00002__rocm7_rc-rocwmma.log | 8 +- ...-00001-of-00002__rocm7_rc-rocwmma__fa1.log | 6 +- ...0001-of-00002__rocm7_rc-rocwmma__hblt0.log | 6 +- ...of-00002__rocm7_rc-rocwmma__hblt0__fa1.log | 8 +- ...Instruct-Q6_K-00001-of-00002__rocm7_rc.log | 8 +- ...uct-Q6_K-00001-of-00002__rocm7_rc__fa1.log | 2 +- ...t-Q6_K-00001-of-00002__rocm7_rc__hblt0.log | 6 +- ...K-00001-of-00002__rocm7_rc__hblt0__fa1.log | 8 +- ...uct-Q6_K-00001-of-00002__vulkan_amdvlk.log | 6 +- ...6_K-00001-of-00002__vulkan_amdvlk__fa1.log | 6 +- ...truct-Q6_K-00001-of-00002__vulkan_radv.log | 6 +- ...-Q6_K-00001-of-00002__vulkan_radv__fa1.log | 6 +- ...Q8_0-00001-of-00003__rocm6_4_3-rocwmma.log | 6 +- ...00001-of-00003__rocm6_4_3-rocwmma__fa1.log | 6 +- ...001-of-00003__rocm6_4_3-rocwmma__hblt0.log | 2 +- ...f-00003__rocm6_4_3-rocwmma__hblt0__fa1.log | 8 +- ...nstruct-Q8_0-00001-of-00003__rocm6_4_3.log | 8 +- ...ct-Q8_0-00001-of-00003__rocm6_4_3__fa1.log | 8 +- ...-Q8_0-00001-of-00003__rocm6_4_3__hblt0.log | 2 +- ...-00001-of-00003__rocm6_4_3__hblt0__fa1.log | 2 +- ...-00003__rocm7_rc-rocwmma-fa_all_quants.log | 10 + ...3__rocm7_rc-rocwmma-fa_all_quants__fa1.log | 10 + ..._rocm7_rc-rocwmma-fa_all_quants__hblt0.log | 10 + ...7_rc-rocwmma-fa_all_quants__hblt0__fa1.log | 10 + ...-Q8_0-00001-of-00003__rocm7_rc-rocwmma.log | 6 +- ...-00001-of-00003__rocm7_rc-rocwmma__fa1.log | 6 +- ...0001-of-00003__rocm7_rc-rocwmma__hblt0.log | 6 +- ...of-00003__rocm7_rc-rocwmma__hblt0__fa1.log | 6 +- ...Instruct-Q8_0-00001-of-00003__rocm7_rc.log | 6 +- ...uct-Q8_0-00001-of-00003__rocm7_rc__fa1.log | 2 +- ...t-Q8_0-00001-of-00003__rocm7_rc__hblt0.log | 6 +- ...0-00001-of-00003__rocm7_rc__hblt0__fa1.log | 2 +- ...uct-Q8_0-00001-of-00003__vulkan_amdvlk.log | 6 +- ...8_0-00001-of-00003__vulkan_amdvlk__fa1.log | 6 +- ...truct-Q8_0-00001-of-00003__vulkan_radv.log | 6 +- ...-Q8_0-00001-of-00003__vulkan_radv__fa1.log | 6 +- ...K_XL-00001-of-00002__rocm6_4_3-rocwmma.log | 8 +- ...00001-of-00002__rocm6_4_3-rocwmma__fa1.log | 6 +- ...001-of-00002__rocm6_4_3-rocwmma__hblt0.log | 6 +- ...f-00002__rocm6_4_3-rocwmma__hblt0__fa1.log | 6 +- ...t-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log | 8 +- ...Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log | 6 +- ..._K_XL-00001-of-00002__rocm6_4_3__hblt0.log | 8 +- ...-00001-of-00002__rocm6_4_3__hblt0__fa1.log | 8 +- ...-00002__rocm7_rc-rocwmma-fa_all_quants.log | 10 + ...2__rocm7_rc-rocwmma-fa_all_quants__fa1.log | 10 + ..._rocm7_rc-rocwmma-fa_all_quants__hblt0.log | 10 + ...7_rc-rocwmma-fa_all_quants__hblt0__fa1.log | 10 + ..._K_XL-00001-of-00002__rocm7_rc-rocwmma.log | 6 +- ...-00001-of-00002__rocm7_rc-rocwmma__fa1.log | 6 +- ...0001-of-00002__rocm7_rc-rocwmma__hblt0.log | 6 +- ...of-00002__rocm7_rc-rocwmma__hblt0__fa1.log | 6 +- ...ct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log | 8 +- ...-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log | 2 +- ...4_K_XL-00001-of-00002__rocm7_rc__hblt0.log | 6 +- ...L-00001-of-00002__rocm7_rc__hblt0__fa1.log | 6 +- ...-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log | 6 +- ..._XL-00001-of-00002__vulkan_amdvlk__fa1.log | 6 +- ...UD-Q4_K_XL-00001-of-00002__vulkan_radv.log | 6 +- ..._K_XL-00001-of-00002__vulkan_radv__fa1.log | 6 +- ...K_XL-00001-of-00003__rocm6_4_3-rocwmma.log | 6 +- ...00001-of-00003__rocm6_4_3-rocwmma__fa1.log | 6 +- ...001-of-00003__rocm6_4_3-rocwmma__hblt0.log | 2 +- ...f-00003__rocm6_4_3-rocwmma__hblt0__fa1.log | 8 +- ...7-UD-Q3_K_XL-00001-of-00003__rocm6_4_3.log | 6 +- ...Q3_K_XL-00001-of-00003__rocm6_4_3__fa1.log | 8 +- ..._K_XL-00001-of-00003__rocm6_4_3__hblt0.log | 8 +- ...-00001-of-00003__rocm6_4_3__hblt0__fa1.log | 8 +- ...-00003__rocm7_rc-rocwmma-fa_all_quants.log | 10 + ...3__rocm7_rc-rocwmma-fa_all_quants__fa1.log | 10 + ..._rocm7_rc-rocwmma-fa_all_quants__hblt0.log | 10 + ...7_rc-rocwmma-fa_all_quants__hblt0__fa1.log | 10 + ..._K_XL-00001-of-00003__rocm7_rc-rocwmma.log | 8 +- ...-00001-of-00003__rocm7_rc-rocwmma__fa1.log | 6 +- ...0001-of-00003__rocm7_rc-rocwmma__hblt0.log | 6 +- ...of-00003__rocm7_rc-rocwmma__hblt0__fa1.log | 6 +- ...07-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log | 6 +- ...-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log | 9 +- ...3_K_XL-00001-of-00003__rocm7_rc__hblt0.log | 6 +- ...L-00001-of-00003__rocm7_rc__hblt0__fa1.log | 2 +- ...-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log | 6 +- ..._XL-00001-of-00003__vulkan_amdvlk__fa1.log | 6 +- ...UD-Q3_K_XL-00001-of-00003__vulkan_radv.log | 6 +- ..._K_XL-00001-of-00003__vulkan_radv__fa1.log | 6 +- ...BF16-00001-of-00002__rocm6_4_3-rocwmma.log | 6 +- ...00001-of-00002__rocm6_4_3-rocwmma__fa1.log | 6 +- ...001-of-00002__rocm6_4_3-rocwmma__hblt0.log | 6 +- ...f-00002__rocm6_4_3-rocwmma__hblt0__fa1.log | 6 +- ...30B-A3B-BF16-00001-of-00002__rocm6_4_3.log | 6 +- ...3B-BF16-00001-of-00002__rocm6_4_3__fa1.log | 6 +- ...-BF16-00001-of-00002__rocm6_4_3__hblt0.log | 6 +- ...-00001-of-00002__rocm6_4_3__hblt0__fa1.log | 6 +- ...-00002__rocm7_rc-rocwmma-fa_all_quants.log | 10 + ...2__rocm7_rc-rocwmma-fa_all_quants__fa1.log | 10 + ..._rocm7_rc-rocwmma-fa_all_quants__hblt0.log | 10 + ...7_rc-rocwmma-fa_all_quants__hblt0__fa1.log | 10 + ...-BF16-00001-of-00002__rocm7_rc-rocwmma.log | 6 +- ...-00001-of-00002__rocm7_rc-rocwmma__fa1.log | 6 +- ...0001-of-00002__rocm7_rc-rocwmma__hblt0.log | 6 +- ...of-00002__rocm7_rc-rocwmma__hblt0__fa1.log | 6 +- ...-30B-A3B-BF16-00001-of-00002__rocm7_rc.log | 6 +- ...A3B-BF16-00001-of-00002__rocm7_rc__fa1.log | 8 +- ...B-BF16-00001-of-00002__rocm7_rc__hblt0.log | 6 +- ...6-00001-of-00002__rocm7_rc__hblt0__fa1.log | 8 +- ...A3B-BF16-00001-of-00002__vulkan_amdvlk.log | 6 +- ...F16-00001-of-00002__vulkan_amdvlk__fa1.log | 6 +- ...B-A3B-BF16-00001-of-00002__vulkan_radv.log | 6 +- ...-BF16-00001-of-00002__vulkan_radv__fa1.log | 6 +- ...uct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma.log | 6 +- ...507-UD-Q6_K_XL__rocm6_4_3-rocwmma__fa1.log | 6 +- ...7-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0.log | 6 +- ...Q6_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log | 6 +- ...3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3.log | 6 +- ...struct-2507-UD-Q6_K_XL__rocm6_4_3__fa1.log | 6 +- ...ruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0.log | 6 +- ...2507-UD-Q6_K_XL__rocm6_4_3__hblt0__fa1.log | 6 +- ...6_K_XL__rocm7_rc-rocwmma-fa_all_quants.log | 10 + ...L__rocm7_rc-rocwmma-fa_all_quants__fa1.log | 10 + ..._rocm7_rc-rocwmma-fa_all_quants__hblt0.log | 10 + ...7_rc-rocwmma-fa_all_quants__hblt0__fa1.log | 10 + ...ruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma.log | 6 +- ...2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log | 6 +- ...07-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0.log | 6 +- ...-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log | 6 +- ...A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log | 6 +- ...nstruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log | 6 +- ...truct-2507-UD-Q6_K_XL__rocm7_rc__hblt0.log | 6 +- ...-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log | 6 +- ...nstruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log | 6 +- ...ct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log | 6 +- ...-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log | 6 +- ...ruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log | 6 +- ...3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma.log | 6 +- ...-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__fa1.log | 6 +- ...t-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0.log | 6 +- ...Q8_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log | 6 +- .../gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3.log | 6 +- ...ma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__fa1.log | 6 +- ...-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0.log | 6 +- ...b-it-UD-Q8_K_XL__rocm6_4_3__hblt0__fa1.log | 6 +- ...8_K_XL__rocm7_rc-rocwmma-fa_all_quants.log | 10 + ...L__rocm7_rc-rocwmma-fa_all_quants__fa1.log | 10 + ..._rocm7_rc-rocwmma-fa_all_quants__hblt0.log | 10 + ...7_rc-rocwmma-fa_all_quants__hblt0__fa1.log | 10 + ...-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma.log | 6 +- ...b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log | 6 +- ...it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0.log | 6 +- ...-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log | 6 +- .../gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log | 6 +- ...mma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log | 6 +- ...a-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0.log | 6 +- ...2b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log | 6 +- ...mma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log | 6 +- ...-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log | 6 +- ...gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log | 6 +- ...-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log | 6 +- ...BF16-00001-of-00002__rocm6_4_3-rocwmma.log | 6 +- ...00001-of-00002__rocm6_4_3-rocwmma__fa1.log | 6 +- ...001-of-00002__rocm6_4_3-rocwmma__hblt0.log | 6 +- ...f-00002__rocm6_4_3-rocwmma__hblt0__fa1.log | 6 +- ...-27b-it-BF16-00001-of-00002__rocm6_4_3.log | 6 +- ...it-BF16-00001-of-00002__rocm6_4_3__fa1.log | 6 +- ...-BF16-00001-of-00002__rocm6_4_3__hblt0.log | 6 +- ...-00001-of-00002__rocm6_4_3__hblt0__fa1.log | 6 +- ...-00002__rocm7_rc-rocwmma-fa_all_quants.log | 10 + ...2__rocm7_rc-rocwmma-fa_all_quants__fa1.log | 10 + ..._rocm7_rc-rocwmma-fa_all_quants__hblt0.log | 10 + ...7_rc-rocwmma-fa_all_quants__hblt0__fa1.log | 10 + ...-BF16-00001-of-00002__rocm7_rc-rocwmma.log | 4 +- ...-00001-of-00002__rocm7_rc-rocwmma__fa1.log | 6 +- ...0001-of-00002__rocm7_rc-rocwmma__hblt0.log | 4 +- ...of-00002__rocm7_rc-rocwmma__hblt0__fa1.log | 6 +- ...3-27b-it-BF16-00001-of-00002__rocm7_rc.log | 6 +- ...-it-BF16-00001-of-00002__rocm7_rc__fa1.log | 6 +- ...t-BF16-00001-of-00002__rocm7_rc__hblt0.log | 4 +- ...6-00001-of-00002__rocm7_rc__hblt0__fa1.log | 6 +- ...7b-it-BF16-00001-of-00002__vulkan_radv.log | 6 +- ...-BF16-00001-of-00002__vulkan_radv__fa1.log | 6 +- ...emma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma.log | 6 +- ...3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__fa1.log | 6 +- ...4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0.log | 6 +- ...-Q3_K_S__rocm6_4_3-rocwmma__hblt0__fa1.log | 6 +- .../gemma-3-4b-it-Q3_K_S__rocm6_4_3.log | 6 +- .../gemma-3-4b-it-Q3_K_S__rocm6_4_3__fa1.log | 6 +- ...gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0.log | 6 +- ...-3-4b-it-Q3_K_S__rocm6_4_3__hblt0__fa1.log | 6 +- ...Q3_K_S__rocm7_rc-rocwmma-fa_all_quants.log | 10 + ...S__rocm7_rc-rocwmma-fa_all_quants__fa1.log | 10 + ..._rocm7_rc-rocwmma-fa_all_quants__hblt0.log | 10 + ...7_rc-rocwmma-fa_all_quants__hblt0__fa1.log | 10 + ...gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma.log | 6 +- ...-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log | 6 +- ...-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0.log | 6 +- ...t-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log | 6 +- .../gemma-3-4b-it-Q3_K_S__rocm7_rc.log | 6 +- .../gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log | 6 +- .../gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0.log | 6 +- ...a-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log | 6 +- .../gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log | 6 +- ...mma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log | 6 +- .../gemma-3-4b-it-Q3_K_S__vulkan_radv.log | 6 +- ...gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log | 6 +- .../gpt-oss-120b-F16__rocm6_4_3-rocwmma.log | 8 +- ...t-oss-120b-F16__rocm6_4_3-rocwmma__fa1.log | 6 +- ...oss-120b-F16__rocm6_4_3-rocwmma__hblt0.log | 6 +- ...20b-F16__rocm6_4_3-rocwmma__hblt0__fa1.log | 6 +- .../results/gpt-oss-120b-F16__rocm6_4_3.log | 6 +- .../gpt-oss-120b-F16__rocm6_4_3__fa1.log | 6 +- .../gpt-oss-120b-F16__rocm6_4_3__hblt0.log | 8 +- ...pt-oss-120b-F16__rocm6_4_3__hblt0__fa1.log | 6 +- ...0b-F16__rocm7_rc-rocwmma-fa_all_quants.log | 10 + ...6__rocm7_rc-rocwmma-fa_all_quants__fa1.log | 10 + ..._rocm7_rc-rocwmma-fa_all_quants__hblt0.log | 10 + ...7_rc-rocwmma-fa_all_quants__hblt0__fa1.log | 10 + .../gpt-oss-120b-F16__rocm7_rc-rocwmma.log | 6 +- ...pt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log | 6 +- ...-oss-120b-F16__rocm7_rc-rocwmma__hblt0.log | 6 +- ...120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log | 6 +- .../results/gpt-oss-120b-F16__rocm7_rc.log | 6 +- .../gpt-oss-120b-F16__rocm7_rc__fa1.log | 6 +- .../gpt-oss-120b-F16__rocm7_rc__hblt0.log | 6 +- ...gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log | 6 +- .../gpt-oss-120b-F16__vulkan_amdvlk.log | 6 +- .../gpt-oss-120b-F16__vulkan_amdvlk__fa1.log | 6 +- .../results/gpt-oss-120b-F16__vulkan_radv.log | 6 +- .../gpt-oss-120b-F16__vulkan_radv__fa1.log | 6 +- ...xfp4-00001-of-00003__rocm6_4_3-rocwmma.log | 6 +- ...00001-of-00003__rocm6_4_3-rocwmma__fa1.log | 6 +- ...001-of-00003__rocm6_4_3-rocwmma__hblt0.log | 6 +- ...f-00003__rocm6_4_3-rocwmma__hblt0__fa1.log | 2 +- ...s-120b-mxfp4-00001-of-00003__rocm6_4_3.log | 6 +- ...b-mxfp4-00001-of-00003__rocm6_4_3__fa1.log | 6 +- ...mxfp4-00001-of-00003__rocm6_4_3__hblt0.log | 6 +- ...-00001-of-00003__rocm6_4_3__hblt0__fa1.log | 6 +- ...-00003__rocm7_rc-rocwmma-fa_all_quants.log | 10 + ...3__rocm7_rc-rocwmma-fa_all_quants__fa1.log | 10 + ..._rocm7_rc-rocwmma-fa_all_quants__hblt0.log | 10 + ...7_rc-rocwmma-fa_all_quants__hblt0__fa1.log | 10 + ...mxfp4-00001-of-00003__rocm7_rc-rocwmma.log | 6 +- ...-00001-of-00003__rocm7_rc-rocwmma__fa1.log | 6 +- ...0001-of-00003__rocm7_rc-rocwmma__hblt0.log | 6 +- ...of-00003__rocm7_rc-rocwmma__hblt0__fa1.log | 6 +- ...ss-120b-mxfp4-00001-of-00003__rocm7_rc.log | 6 +- ...0b-mxfp4-00001-of-00003__rocm7_rc__fa1.log | 6 +- ...-mxfp4-00001-of-00003__rocm7_rc__hblt0.log | 6 +- ...4-00001-of-00003__rocm7_rc__hblt0__fa1.log | 6 +- ...0b-mxfp4-00001-of-00003__vulkan_amdvlk.log | 6 +- ...fp4-00001-of-00003__vulkan_amdvlk__fa1.log | 6 +- ...120b-mxfp4-00001-of-00003__vulkan_radv.log | 6 +- ...mxfp4-00001-of-00003__vulkan_radv__fa1.log | 6 +- .../gpt-oss-20b-F32__rocm6_4_3-rocwmma.log | 6 +- ...pt-oss-20b-F32__rocm6_4_3-rocwmma__fa1.log | 6 +- ...-oss-20b-F32__rocm6_4_3-rocwmma__hblt0.log | 6 +- ...20b-F32__rocm6_4_3-rocwmma__hblt0__fa1.log | 6 +- .../results/gpt-oss-20b-F32__rocm6_4_3.log | 6 +- .../gpt-oss-20b-F32__rocm6_4_3__fa1.log | 6 +- .../gpt-oss-20b-F32__rocm6_4_3__hblt0.log | 6 +- ...gpt-oss-20b-F32__rocm6_4_3__hblt0__fa1.log | 6 +- ...0b-F32__rocm7_rc-rocwmma-fa_all_quants.log | 10 + ...2__rocm7_rc-rocwmma-fa_all_quants__fa1.log | 10 + ..._rocm7_rc-rocwmma-fa_all_quants__hblt0.log | 10 + ...7_rc-rocwmma-fa_all_quants__hblt0__fa1.log | 10 + .../gpt-oss-20b-F32__rocm7_rc-rocwmma.log | 6 +- ...gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log | 6 +- ...t-oss-20b-F32__rocm7_rc-rocwmma__hblt0.log | 6 +- ...-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log | 6 +- .../results/gpt-oss-20b-F32__rocm7_rc.log | 6 +- .../gpt-oss-20b-F32__rocm7_rc__fa1.log | 6 +- .../gpt-oss-20b-F32__rocm7_rc__hblt0.log | 6 +- .../gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log | 6 +- .../gpt-oss-20b-F32__vulkan_amdvlk.log | 6 +- .../gpt-oss-20b-F32__vulkan_amdvlk__fa1.log | 6 +- .../results/gpt-oss-20b-F32__vulkan_radv.log | 6 +- .../gpt-oss-20b-F32__vulkan_radv__fa1.log | 6 +- .../gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma.log | 6 +- ...-oss-20b-mxfp4__rocm6_4_3-rocwmma__fa1.log | 6 +- ...ss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0.log | 6 +- ...b-mxfp4__rocm6_4_3-rocwmma__hblt0__fa1.log | 6 +- .../results/gpt-oss-20b-mxfp4__rocm6_4_3.log | 6 +- .../gpt-oss-20b-mxfp4__rocm6_4_3__fa1.log | 6 +- .../gpt-oss-20b-mxfp4__rocm6_4_3__hblt0.log | 6 +- ...t-oss-20b-mxfp4__rocm6_4_3__hblt0__fa1.log | 6 +- ...-mxfp4__rocm7_rc-rocwmma-fa_all_quants.log | 10 + ...4__rocm7_rc-rocwmma-fa_all_quants__fa1.log | 10 + ..._rocm7_rc-rocwmma-fa_all_quants__hblt0.log | 10 + ...7_rc-rocwmma-fa_all_quants__hblt0__fa1.log | 10 + .../gpt-oss-20b-mxfp4__rocm7_rc-rocwmma.log | 6 +- ...t-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log | 6 +- ...oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0.log | 6 +- ...0b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log | 6 +- .../results/gpt-oss-20b-mxfp4__rocm7_rc.log | 6 +- .../gpt-oss-20b-mxfp4__rocm7_rc__fa1.log | 6 +- .../gpt-oss-20b-mxfp4__rocm7_rc__hblt0.log | 6 +- ...pt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log | 6 +- .../gpt-oss-20b-mxfp4__vulkan_amdvlk.log | 6 +- .../gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log | 6 +- .../gpt-oss-20b-mxfp4__vulkan_radv.log | 6 +- .../gpt-oss-20b-mxfp4__vulkan_radv__fa1.log | 6 +- .../llama-2-7b.Q4_0__rocm6_4_3-rocwmma.log | 6 +- ...lama-2-7b.Q4_0__rocm6_4_3-rocwmma__fa1.log | 6 +- ...ma-2-7b.Q4_0__rocm6_4_3-rocwmma__hblt0.log | 6 +- ...7b.Q4_0__rocm6_4_3-rocwmma__hblt0__fa1.log | 6 +- .../results/llama-2-7b.Q4_0__rocm6_4_3.log | 6 +- .../llama-2-7b.Q4_0__rocm6_4_3__fa1.log | 6 +- .../llama-2-7b.Q4_0__rocm6_4_3__hblt0.log | 6 +- ...llama-2-7b.Q4_0__rocm6_4_3__hblt0__fa1.log | 6 +- ...b.Q4_0__rocm7_rc-rocwmma-fa_all_quants.log | 10 + ...0__rocm7_rc-rocwmma-fa_all_quants__fa1.log | 10 + ..._rocm7_rc-rocwmma-fa_all_quants__hblt0.log | 10 + ...7_rc-rocwmma-fa_all_quants__hblt0__fa1.log | 10 + .../llama-2-7b.Q4_0__rocm7_rc-rocwmma.log | 6 +- ...llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log | 6 +- ...ama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0.log | 6 +- ...-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log | 6 +- .../results/llama-2-7b.Q4_0__rocm7_rc.log | 6 +- .../llama-2-7b.Q4_0__rocm7_rc__fa1.log | 6 +- .../llama-2-7b.Q4_0__rocm7_rc__hblt0.log | 6 +- .../llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log | 6 +- .../llama-2-7b.Q4_0__vulkan_amdvlk.log | 6 +- .../llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log | 6 +- .../results/llama-2-7b.Q4_0__vulkan_radv.log | 6 +- .../llama-2-7b.Q4_0__vulkan_radv__fa1.log | 6 +- docs/index.html | 106 +- docs/results.json | 20818 +++++++++------- 408 files changed, 14136 insertions(+), 9467 deletions(-) create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log create mode 100644 benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log create mode 100644 benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log create mode 100644 benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log create mode 100644 benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log create mode 100644 benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log create mode 100644 benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log create mode 100644 benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log create mode 100644 benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log create mode 100644 benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log create mode 100644 benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log create mode 100644 benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log create mode 100644 benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log create mode 100644 benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants.log create mode 100644 benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants__fa1.log create mode 100644 benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0.log create mode 100644 benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log create mode 100644 benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants.log create mode 100644 benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants__fa1.log create mode 100644 benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0.log create mode 100644 benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log create mode 100644 benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log create mode 100644 benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log create mode 100644 benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log create mode 100644 benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log create mode 100644 benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants.log create mode 100644 benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants__fa1.log create mode 100644 benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants__hblt0.log create mode 100644 benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log create mode 100644 benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants.log create mode 100644 benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants__fa1.log create mode 100644 benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants__hblt0.log create mode 100644 benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log create mode 100644 benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log create mode 100644 benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log create mode 100644 benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log create mode 100644 benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log create mode 100644 benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants.log create mode 100644 benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants__fa1.log create mode 100644 benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants__hblt0.log create mode 100644 benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log create mode 100644 benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants.log create mode 100644 benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants__fa1.log create mode 100644 benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants__hblt0.log create mode 100644 benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log create mode 100644 benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants.log create mode 100644 benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants__fa1.log create mode 100644 benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants__hblt0.log create mode 100644 benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log index 7465f25..3d2d9c2 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 126.62 ± 0.10 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 19.95 ± 0.02 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 125.93 ± 0.26 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.52 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log index f8e8b6b..47aebe3 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 135.10 ± 0.35 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.14 ± 0.01 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 135.40 ± 0.23 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.69 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log index fad8a13..ad3bc31 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 130.99 ± 0.36 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.14 ± 0.01 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 132.28 ± 0.14 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.50 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log index d4132eb..f3696a4 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 140.15 ± 0.41 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.15 ± 0.01 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 139.86 ± 0.32 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.70 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log index b84b584..cba7aea 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 126.66 ± 0.22 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.14 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 125.92 ± 0.27 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.52 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log index 350aa44..3f6be05 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 100.20 ± 0.13 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.30 ± 0.01 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 134.12 ± 0.59 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.66 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0.log index ec3889b..b894fe4 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0.log @@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x2624d340) reason :GPU Hang -✖ ! [rocm6_4_3] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __hblt0 failed (exit 134) +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 131.45 ± 0.35 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.53 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log index 2d21418..ffffffb 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log @@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x37c5d340) on address 0x7f2e3516f000. Reason: Page not present or supervisor privilege. -✖ ! [rocm6_4_3] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __hblt0__fa1 failed (exit 134) +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 140.67 ± 0.26 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.67 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log new file mode 100644 index 0000000..d34b8a5 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 94.56 ± 0.11 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 19.90 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log new file mode 100644 index 0000000..638b42c --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 127.25 ± 0.57 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.66 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log new file mode 100644 index 0000000..6310d52 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 128.69 ± 0.57 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.56 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log new file mode 100644 index 0000000..8605e6d --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 169.19 ± 0.12 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.67 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log index d99dfb8..e2005d4 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 117.48 ± 0.53 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.11 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 94.71 ± 0.12 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.53 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log index b74a931..f579cee 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 126.27 ± 0.47 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 19.86 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 126.97 ± 0.54 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.70 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log index 906c8ff..38f5f9b 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 158.54 ± 0.42 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.11 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 160.39 ± 0.34 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.56 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log index d51baac..f4788eb 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 166.11 ± 0.32 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 19.83 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 169.35 ± 0.56 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.65 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log index 3c47854..bf5b0b4 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 89.60 ± 0.20 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.22 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 94.73 ± 0.22 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.47 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log index bf63f0d..7d4fe91 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 64.66 ± 0.16 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.35 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 93.27 ± 0.18 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.67 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log index a3b2c08..6afa9b4 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log @@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x1d380ea0) reason :GPU Hang -✖ ! [rocm7_rc] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __hblt0 failed (exit 134) +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 159.89 ± 0.44 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.55 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log index 9cc7929..543674d 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log @@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x4a0fea0) on address 0x7f3bf796f000. Reason: Page not present or supervisor privilege. -✖ ! [rocm7_rc] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __hblt0__fa1 failed (exit 134) +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 170.42 ± 0.33 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.66 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log index d8fa4d5..426e6cc 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 197.95 ± 0.29 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 23.24 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 217.22 ± 0.49 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 24.18 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log index eece528..9a614f0 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 199.40 ± 0.35 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 23.26 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 219.61 ± 0.55 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 24.21 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log index 5b8bc47..5471167 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 126.28 ± 0.17 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 23.33 ± 0.01 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 212.60 ± 0.74 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 24.18 ± 0.03 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log index 41d8077..9353954 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 131.64 ± 0.32 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 23.88 ± 0.01 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 224.85 ± 2.55 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 24.64 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log index f5f0ad3..5dfa73b 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 121.82 ± 0.35 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.59 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 120.87 ± 0.23 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.86 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log index 2bfbda7..d31d01b 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 126.60 ± 0.30 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 15.62 ± 0.04 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 128.65 ± 0.59 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 15.96 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log index 159477b..a89a06c 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x26e36340) on address 0x7fcef3635000. Reason: Page not present or supervisor privilege. +HW Exception by GPU node-1 (Agent handle: 0xe6e7340) reason :GPU Hang ✖ ! [rocm6_4_3-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log index f625092..4115027 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x35263340) reason :GPU Hang +Memory access fault by GPU node-1 (Agent handle: 0x400a9340) on address 0x7ef17b435000. Reason: Page not present or supervisor privilege. ✖ ! [rocm6_4_3-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3.log index 8ac4440..b7bc1fd 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 117.95 ± 0.30 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.65 ± 0.01 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 120.53 ± 0.28 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.87 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__fa1.log index 581bc16..d9d9b0b 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__fa1.log @@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x28aa3340) on address 0x7fb93761b000. Reason: Page not present or supervisor privilege. -✖ ! [rocm6_4_3] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 failed (exit 134) +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 129.22 ± 0.41 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 15.95 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__hblt0.log index 486bf8f..b47fc74 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__hblt0.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__hblt0.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x14d05340) reason :GPU Hang +Memory access fault by GPU node-1 (Agent handle: 0x22558310) on address 0x7f7830fad000. Reason: Page not present or supervisor privilege. ✖ ! [rocm6_4_3] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__hblt0__fa1.log index fb08717..d51b566 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__hblt0__fa1.log @@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x265e8340) reason :GPU Hang -✖ ! [rocm6_4_3] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0__fa1 failed (exit 134) +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 128.68 ± 0.22 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 15.96 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log new file mode 100644 index 0000000..b0e83f6 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 91.95 ± 0.25 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.76 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log new file mode 100644 index 0000000..252ee22 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 70.00 ± 0.17 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 15.98 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log new file mode 100644 index 0000000..2210c41 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 134.22 ± 0.50 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.90 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log new file mode 100644 index 0000000..07268a4 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 159.75 ± 0.33 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 15.99 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma.log index 8ca5c1e..96555fb 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 69.19 ± 0.20 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.64 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 92.18 ± 0.04 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.92 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log index 6eaa1df..e3f3b88 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 114.61 ± 0.20 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 15.51 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 121.75 ± 0.32 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 15.97 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log index 5fb6167..ebfc2f2 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 120.88 ± 0.92 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.61 ± 0.09 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 151.32 ± 0.45 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.90 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log index ec552ad..c57d72f 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 150.07 ± 0.56 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 15.52 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 161.10 ± 0.36 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 15.99 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log index dc5e6ec..3f154b9 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 69.52 ± 0.17 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.63 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 92.20 ± 0.11 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.85 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log index c1980f8..068b285 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 74.02 ± 0.13 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 15.73 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 71.02 ± 0.16 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 15.96 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0.log index 350f64d..88bcc9c 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 142.67 ± 0.75 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.68 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 147.32 ± 0.43 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.91 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log index 6a1fdfd..928c3ad 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log @@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x1c536ea0) on address 0x7f623b57e000. Reason: Page not present or supervisor privilege. -✖ ! [rocm7_rc] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0__fa1 failed (exit 134) +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 161.37 ± 0.36 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 15.99 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log index 1a18e9c..a8b6d69 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 219.81 ± 0.70 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 16.80 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 264.50 ± 0.99 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 17.27 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log index bc34d0a..740e1fd 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 222.20 ± 0.63 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 16.82 ± 0.01 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 267.86 ± 1.22 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 17.28 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log index 1c621cf..9f963a8 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 126.55 ± 0.40 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 17.07 ± 0.01 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 208.01 ± 0.73 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 17.49 ± 0.02 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log index 3dbeebf..c1968b9 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 131.25 ± 0.50 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 17.31 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 221.63 ± 1.26 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 17.71 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log index 769f2f3..07addc3 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log @@ -7,9 +7,5 @@ This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASL rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 98.02 ± 0.18 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.77 ± 0.00 | - -build: 1fe00296 (6182) +HW Exception by GPU node-1 (Agent handle: 0x284c3340) reason :GPU Hang +✖ ! [rocm6_4_3-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 failed (exit 134) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log index 4451e68..cbc1a68 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 101.83 ± 0.11 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.77 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 101.82 ± 0.06 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log index 2553f31..635e8f4 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x21da1340) reason :GPU Hang +HW Exception by GPU node-1 (Agent handle: 0x7166340) reason :GPU Hang ✖ ! [rocm6_4_3-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __hblt0 failed (exit 134) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log index 1a88dcd..8ab83e5 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x15ac2340) reason :GPU Hang +HW Exception by GPU node-1 (Agent handle: 0x37f0e340) reason :GPU Hang ✖ ! [rocm6_4_3-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3.log index 56497ee..ed7de0e 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 97.13 ± 0.17 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 94.79 ± 0.14 | | llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.78 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__fa1.log index 08a5922..ab92419 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 80.42 ± 0.08 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 104.62 ± 0.08 | | llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__hblt0.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__hblt0.log index f5b7147..b2280bc 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__hblt0.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__hblt0.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x2c1e5340) reason :GPU Hang +HW Exception by GPU node-1 (Agent handle: 0x12cee310) reason :GPU Hang ✖ ! [rocm6_4_3] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __hblt0 failed (exit 134) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log index 013fc7c..d7451a9 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x3e536340) on address 0x7f9182f6f000. Reason: Page not present or supervisor privilege. +Memory access fault by GPU node-1 (Agent handle: 0x367c310) on address 0x7fc07ad93000. Reason: Page not present or supervisor privilege. ✖ ! [rocm6_4_3] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log new file mode 100644 index 0000000..4f142bc --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 98.15 ± 0.16 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.77 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log new file mode 100644 index 0000000..7ec867a --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 102.79 ± 0.14 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log new file mode 100644 index 0000000..102a571 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 93.89 ± 0.22 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.78 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log new file mode 100644 index 0000000..5d3aace --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 97.53 ± 0.17 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log index 62ab0f7..896a9c7 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 97.31 ± 0.20 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 97.42 ± 0.12 | | llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.78 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log index 1e31a60..a3dbd8f 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 100.85 ± 0.13 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.77 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 101.56 ± 0.04 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log index c5612f6..9ead8d1 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 93.00 ± 0.22 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 92.02 ± 0.17 | | llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.78 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log index e557123..80ec16d 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 97.88 ± 0.09 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.77 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 97.10 ± 0.17 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log index 7c6d5fd..fb7b0d0 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 99.41 ± 0.36 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 95.12 ± 0.17 | | llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.77 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log index f65845d..8390da3 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log @@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x1f66bec0) on address 0x7f3e84b6f000. Reason: Page not present or supervisor privilege. -✖ ! [rocm7_rc] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 134) +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 103.16 ± 0.07 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.01 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log index 44b15c4..f0129d8 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 94.06 ± 0.09 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 93.86 ± 0.18 | | llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.78 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log index 970522b..23bf2f4 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log @@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0xac09ec0) on address 0x7f283f56f000. Reason: Page not present or supervisor privilege. -✖ ! [rocm7_rc] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __hblt0__fa1 failed (exit 134) +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 95.87 ± 0.08 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log index 13252f9..114cbeb 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 0 | pp512 | 98.03 ± 0.24 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 0 | tg128 | 2.78 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 0 | pp512 | 97.72 ± 0.36 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 0 | tg128 | 2.81 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log index 5781898..4d303d4 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp512 | 99.12 ± 0.25 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg128 | 2.77 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp512 | 99.04 ± 0.31 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg128 | 2.80 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log index 9500e62..dcad22c 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 0 | pp512 | 75.59 ± 0.28 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 0 | pp512 | 78.94 ± 0.51 | | llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 0 | tg128 | 2.78 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log index 6c47ac0..d7b8226 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp512 | 80.09 ± 0.38 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp512 | 80.90 ± 0.77 | | llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma.log index ebcf552..1a327e9 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma.log @@ -7,5 +7,9 @@ This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASL rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -Memory access fault by GPU node-1 (Agent handle: 0x1a840340) on address 0x7f3babb56000. Reason: Page not present or supervisor privilege. -✖ ! [rocm6_4_3-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 failed (exit 134) +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 265.76 ± 0.95 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.69 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__fa1.log index ab25429..eec534a 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 291.08 ± 1.26 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.53 ± 0.00 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 289.14 ± 1.57 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.64 ± 0.15 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log index 65957d1..3fc5e97 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log @@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 134.19 ± 1.49 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.56 ± 0.01 | - -build: 1fe00296 (6182) +HW Exception by GPU node-1 (Agent handle: 0x24187340) reason :GPU Hang +✖ ! [rocm6_4_3-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __hblt0 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log index dcff3bd..b5d2530 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x1de78340) reason :GPU Hang +HW Exception by GPU node-1 (Agent handle: 0x3da9340) reason :GPU Hang ✖ ! [rocm6_4_3-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3.log index f75714d..ba3f44f 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3.log @@ -7,9 +7,5 @@ This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASL rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 270.28 ± 1.29 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.58 ± 0.03 | - -build: 1fe00296 (6182) +HW Exception by GPU node-1 (Agent handle: 0x11bc3310) reason :GPU Hang +✖ ! [rocm6_4_3] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__fa1.log index 7c8dc76..772e891 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__fa1.log @@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x2162b340) on address 0x7f500556f000. Reason: Page not present or supervisor privilege. -✖ ! [rocm6_4_3] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 failed (exit 134) +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 291.67 ± 0.91 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.71 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__hblt0.log index 96e7a94..b338079 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__hblt0.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__hblt0.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0xdacf340) reason :GPU Hang +HW Exception by GPU node-1 (Agent handle: 0x8a0a310) reason :GPU Hang ✖ ! [rocm6_4_3] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __hblt0 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__hblt0__fa1.log index 65a2e33..d72a6cb 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__hblt0__fa1.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x3dc00340) reason :GPU Hang +HW Exception by GPU node-1 (Agent handle: 0x1ada6310) reason :GPU Hang ✖ ! [rocm6_4_3] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log new file mode 100644 index 0000000..334b616 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 276.44 ± 1.46 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.55 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log new file mode 100644 index 0000000..632535a --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 292.67 ± 1.04 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.71 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log new file mode 100644 index 0000000..8fca923 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 273.88 ± 1.14 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.70 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log new file mode 100644 index 0000000..9bba0b6 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 284.81 ± 1.55 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.72 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma.log index 64b52d5..3e4a420 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma.log @@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x3882bf60) reason :GPU Hang -✖ ! [rocm7_rc-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 failed (exit 134) +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 274.13 ± 0.84 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.71 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log index 11cbda1..44047fc 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 285.84 ± 9.41 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.37 ± 0.00 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 292.92 ± 2.63 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.71 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0.log index 7575a7e..52dda2f 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 273.97 ± 1.67 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.57 ± 0.05 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 273.23 ± 1.35 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.70 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log index 647f737..f01372d 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log @@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 285.26 ± 1.79 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.33 ± 0.03 | - -build: de219279 (6181) +HW Exception by GPU node-1 (Agent handle: 0x13c5d180) reason :GPU Hang +✖ ! [rocm7_rc-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log index 6ac1b55..f79d354 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log @@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 276.37 ± 1.65 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.57 ± 0.04 | - -build: de219279 (6181) +Memory access fault by GPU node-1 (Agent handle: 0x381db160) on address 0x7f72baf68000. Reason: Page not present or supervisor privilege. +✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log index bc46574..d2e6f1c 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0xa893ec0) on address 0x7f070a3a9000. Reason: Page not present or supervisor privilege. +HW Exception by GPU node-1 (Agent handle: 0x34902180) reason :GPU Hang ✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0.log index ca30067..ff5e1de 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 269.17 ± 0.99 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.63 ± 0.01 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 274.52 ± 1.78 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.70 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1.log index 56c35f6..e29a4b0 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1.log @@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x1db86ec0) on address 0x7f2273f6f000. Reason: Page not present or supervisor privilege. -✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __hblt0__fa1 failed (exit 134) +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 287.04 ± 1.92 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.71 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log index 8c8e292..e235c42 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 242.07 ± 1.05 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 15.56 ± 0.01 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 224.02 ± 2.86 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 15.98 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log index 71556fe..9e0f45f 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 244.49 ± 1.13 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 15.33 ± 0.00 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 234.30 ± 1.10 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 15.75 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log index 33a8b80..eff084d 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 147.08 ± 0.98 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 15.50 ± 0.01 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 201.49 ± 2.22 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 15.77 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log index caf7973..ff879bc 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 149.97 ± 1.10 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 15.49 ± 0.00 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 202.49 ± 5.98 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 15.74 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma.log index 40cf34f..e0ffee4 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 270.35 ± 3.39 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.78 ± 0.03 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 264.44 ± 24.69 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.88 ± 0.05 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__fa1.log index 4a19b96..1f9cd7e 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 292.23 ± 3.13 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 11.73 ± 0.03 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 298.83 ± 1.59 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 11.89 ± 0.06 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log index 65afec2..a4e7e73 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x5f69340) reason :GPU Hang +HW Exception by GPU node-1 (Agent handle: 0x3265f340) reason :GPU Hang ✖ ! [rocm6_4_3-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __hblt0 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log index 98dc8c5..2f00d23 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log @@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 140.27 ± 0.97 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 11.74 ± 0.00 | - -build: 1fe00296 (6182) +HW Exception by GPU node-1 (Agent handle: 0x33cad340) reason :GPU Hang +✖ ! [rocm6_4_3-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3.log index 8ac0514..052bfbb 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3.log @@ -7,5 +7,9 @@ This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASL rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -HW Exception by GPU node-1 (Agent handle: 0x2079b340) reason :GPU Hang -✖ ! [rocm6_4_3] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 failed (exit 134) +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 274.49 ± 1.84 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.91 ± 0.01 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__fa1.log index 8d6068b..5571471 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__fa1.log @@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x37ff7340) on address 0x7fa76bba9000. Reason: Page not present or supervisor privilege. -✖ ! [rocm6_4_3] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 failed (exit 134) +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 298.07 ± 2.73 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 11.89 ± 0.06 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__hblt0.log index 2add86a..27d8d9a 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__hblt0.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__hblt0.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x2a344340) reason :GPU Hang +HW Exception by GPU node-1 (Agent handle: 0x1ac74310) reason :GPU Hang ✖ ! [rocm6_4_3] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __hblt0 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__hblt0__fa1.log index 977948c..6e6a4d0 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__hblt0__fa1.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x27934340) on address 0x7f656656f000. Reason: Page not present or supervisor privilege. +HW Exception by GPU node-1 (Agent handle: 0x390d2310) reason :GPU Hang ✖ ! [rocm6_4_3] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log new file mode 100644 index 0000000..7928822 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 275.21 ± 1.93 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.85 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log new file mode 100644 index 0000000..1e1c530 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 292.69 ± 2.25 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 11.91 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log new file mode 100644 index 0000000..7b4105e --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 267.51 ± 12.72 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.90 ± 0.04 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log new file mode 100644 index 0000000..546aac5 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 286.25 ± 4.29 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 11.90 ± 0.05 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma.log index 03112eb..42ba624 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 279.13 ± 2.90 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.79 ± 0.07 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 260.60 ± 10.80 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.82 ± 0.22 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log index 182bf9d..a58997b 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 293.60 ± 3.84 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 11.62 ± 0.02 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 293.26 ± 3.75 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 11.92 ± 0.01 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0.log index 66c4a41..2b05052 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 264.02 ± 2.74 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.79 ± 0.06 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 275.91 ± 1.81 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.91 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log index 5439b6b..e5a0196 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 279.69 ± 2.30 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 11.60 ± 0.04 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 260.83 ± 5.18 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 11.82 ± 0.20 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log index f3af1c9..268c6ee 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 252.38 ± 7.70 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.35 ± 0.60 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 279.56 ± 3.76 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.88 ± 0.02 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log index 9932c99..a877428 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x2e56aec0) on address 0x7f4102f6f000. Reason: Page not present or supervisor privilege. +HW Exception by GPU node-1 (Agent handle: 0x30007180) reason :GPU Hang ✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0.log index d31f283..b6e5310 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 271.54 ± 4.10 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.57 ± 0.58 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 278.61 ± 2.47 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.92 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1.log index 986706b..19dcf24 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x1aa83ec0) on address 0x7f9f1e96f000. Reason: Page not present or supervisor privilege. +HW Exception by GPU node-1 (Agent handle: 0x231e9180) reason :GPU Hang ✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log index 089be05..5436987 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 258.54 ± 1.39 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 12.45 ± 0.01 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 343.36 ± 1.37 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 12.57 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log index 42f4672..1755f19 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 262.84 ± 1.39 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 12.30 ± 0.01 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 347.56 ± 1.15 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 12.42 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log index e7df5fa..0d356f0 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 169.23 ± 0.84 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 12.45 ± 0.00 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 244.52 ± 1.08 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 12.57 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log index 2776458..be8e1fa 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 173.79 ± 0.85 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 12.44 ± 0.01 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 253.13 ± 1.36 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 12.56 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log index f500cd5..fb8f81b 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log @@ -7,9 +7,5 @@ This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASL rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 285.51 ± 1.64 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.70 ± 0.01 | - -build: 1fe00296 (6182) +HW Exception by GPU node-1 (Agent handle: 0x1f49a340) reason :GPU Hang +✖ ! [rocm6_4_3-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log index 85f9bfb..ca1ff97 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 308.62 ± 2.62 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.54 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 307.79 ± 3.48 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.81 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log index 78e4255..a0f2615 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 137.71 ± 0.62 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.71 ± 0.01 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 138.51 ± 0.72 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.80 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log index 8945a72..1e52153 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 142.62 ± 0.82 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.55 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 142.41 ± 0.57 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.80 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log index 5aa96bf..1039808 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log @@ -2,14 +2,14 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found +hipBLASLt error: Heuristic Fetch Failed! This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 286.37 ± 1.44 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.70 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 282.50 ± 1.23 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.78 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log index 98b05eb..720ab4a 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 234.68 ± 1.31 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.71 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 306.89 ± 1.35 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.82 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0.log index 186cdf8..91a8494 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0.log @@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0xa3c7340) reason :GPU Hang -✖ ! [rocm6_4_3] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __hblt0 failed (exit 134) +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 137.98 ± 0.67 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.79 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log index 19b75a4..7b80e05 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log @@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x118c6340) reason :GPU Hang -✖ ! [rocm6_4_3] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __hblt0__fa1 failed (exit 134) +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 143.18 ± 0.45 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.81 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log new file mode 100644 index 0000000..86b7922 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 283.86 ± 1.18 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.71 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log new file mode 100644 index 0000000..562f9a8 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 299.13 ± 2.14 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.77 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log new file mode 100644 index 0000000..5230084 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 286.66 ± 1.37 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.79 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log new file mode 100644 index 0000000..8db788a --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 300.00 ± 1.51 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.82 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log index 54d6795..7ce4256 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 290.54 ± 1.59 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.67 ± 0.01 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 284.38 ± 0.76 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.82 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log index 6f81260..82d7419 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 304.99 ± 0.37 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.28 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 306.40 ± 1.77 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.78 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log index 721ffc5..fe0f034 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 283.93 ± 1.57 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.65 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 286.08 ± 2.96 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.82 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log index 241fb21..1ae0137 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 300.13 ± 1.26 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.27 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 297.71 ± 1.73 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.78 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log index 19895f7..2464692 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log @@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 291.60 ± 1.95 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.73 ± 0.00 | - -build: de219279 (6181) +Memory access fault by GPU node-1 (Agent handle: 0x4092b180) on address 0x7fe1ddb56000. Reason: Page not present or supervisor privilege. +✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log index d607e30..a5a4c8a 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x134adec0) on address 0x7f0318984000. Reason: Page not present or supervisor privilege. +Memory access fault by GPU node-1 (Agent handle: 0x2b6cf180) on address 0x7f9694f56000. Reason: Page not present or supervisor privilege. ✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log index a0c7756..a9689cc 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 285.56 ± 1.41 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.72 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 284.17 ± 2.14 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.80 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log index ae6829f..b0472bd 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 227.75 ± 1.52 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.73 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 300.96 ± 1.85 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.81 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log index f89d42c..06fe45e 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 216.64 ± 2.76 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 20.39 ± 0.02 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 191.71 ± 1.02 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 21.03 ± 0.02 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log index 7ee60d9..9480722 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 217.68 ± 4.15 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 19.97 ± 0.01 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 193.39 ± 1.52 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 20.61 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log index 002154b..71442c3 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 151.98 ± 0.60 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 20.26 ± 0.02 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 213.71 ± 2.99 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 20.87 ± 0.03 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log index 64d4625..07180ac 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 154.96 ± 0.82 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 20.28 ± 0.01 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 217.08 ± 5.59 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 20.85 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log index d927ed6..74dae23 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 130.11 ± 0.68 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 13.95 ± 0.04 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 129.22 ± 0.43 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 14.25 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log index 4638f26..3f8d6e9 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 144.31 ± 0.80 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 13.71 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 143.48 ± 1.30 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.32 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log index d8218bd..148c268 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x8063340) reason :GPU Hang +HW Exception by GPU node-1 (Agent handle: 0xd6b8340) reason :GPU Hang ✖ ! [rocm6_4_3-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __hblt0 failed (exit 134) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log index 3a247e3..131dcc4 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log @@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x18398340) reason :GPU Hang -✖ ! [rocm6_4_3-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __hblt0__fa1 failed (exit 134) +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 75.22 ± 0.16 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.29 ± 0.03 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3.log index 82caf25..c9464c6 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 131.78 ± 1.03 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 13.68 ± 0.43 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 129.64 ± 0.35 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 14.24 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__fa1.log index a0ed178..f654098 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__fa1.log @@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x50aa340) on address 0x7f7365ba9000. Reason: Page not present or supervisor privilege. -✖ ! [rocm6_4_3] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 failed (exit 134) +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 144.82 ± 0.84 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.32 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__hblt0.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__hblt0.log index 3767019..64085e5 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__hblt0.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__hblt0.log @@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x1990d340) reason :GPU Hang -✖ ! [rocm6_4_3] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __hblt0 failed (exit 134) +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 74.17 ± 0.11 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 14.27 ± 0.01 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__hblt0__fa1.log index f1dc1f5..78154a2 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__hblt0__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__hblt0__fa1.log @@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x180d4340) on address 0x7f11c8f6f000. Reason: Page not present or supervisor privilege. -✖ ! [rocm6_4_3] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __hblt0__fa1 failed (exit 134) +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 77.91 ± 0.23 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.31 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log new file mode 100644 index 0000000..304856f --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 131.98 ± 0.86 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 14.14 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log new file mode 100644 index 0000000..4b9aafa --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 141.08 ± 0.51 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.29 ± 0.04 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log new file mode 100644 index 0000000..fdc7289 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 130.87 ± 0.83 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 14.25 ± 0.05 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log new file mode 100644 index 0000000..011ab77 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 137.23 ± 0.55 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.32 ± 0.01 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma.log index 38826b0..bac74c4 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma.log @@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x3c89ef80) on address 0x7f777640a000. Reason: Page not present or supervisor privilege. -✖ ! [rocm7_rc-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 failed (exit 134) +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 132.60 ± 0.56 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 14.33 ± 0.02 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log index a74258c..3ce3c01 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 141.61 ± 0.92 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 13.34 ± 0.02 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 143.76 ± 0.58 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.36 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log index 966ba3e..f3e8a66 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 133.33 ± 0.68 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 13.78 ± 0.04 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 134.24 ± 0.57 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 14.32 ± 0.03 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log index 22a4e47..baf0257 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 139.60 ± 0.47 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 13.03 ± 0.57 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 141.84 ± 0.84 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.37 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log index daaa296..818e7e0 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 134.95 ± 0.76 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 13.99 ± 0.01 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 134.45 ± 0.50 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 14.32 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log index 0d3acaf..7cfb635 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log @@ -2,6 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -:0:rocdevice.cpp :3675: 29915649820 us: Callback: Queue 0x7f500c700000 aborting with error : HSA_STATUS_ERROR_EXCEPTION: An HSAIL operation resulted in a hardware exception. code: 0x1016 -Memory access fault by GPU node-1 (Agent handle: 0x2aad9ec0) on address 0x7f37c576f000. Reason: Page not present or supervisor privilege. -✖ ! [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 failed (exit 134) +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 145.01 ± 0.84 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.36 ± 0.02 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0.log index 8135bc1..5c4c044 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 135.29 ± 0.51 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 13.97 ± 0.04 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 133.40 ± 0.48 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 14.32 ± 0.04 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log index fc4a37f..c4be1e2 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x122f2ec0) on address 0x7f10537a9000. Reason: Page not present or supervisor privilege. +Memory access fault by GPU node-1 (Agent handle: 0x293f4180) on address 0x7f5ee4f70000. Reason: Page not present or supervisor privilege. ✖ ! [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log index ec978bb..9c8e307 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 0 | pp512 | 112.93 ± 0.63 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 0 | tg128 | 16.43 ± 0.01 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 0 | pp512 | 131.33 ± 1.43 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 0 | tg128 | 17.27 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log index e560c0b..0d15b29 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp512 | 114.35 ± 1.12 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg128 | 16.27 ± 0.01 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp512 | 133.32 ± 1.63 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg128 | 17.12 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log index 5fbf704..4512dd6 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 0 | pp512 | 64.60 ± 0.38 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 0 | tg128 | 17.03 ± 0.01 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 0 | pp512 | 115.77 ± 1.42 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 0 | tg128 | 17.75 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log index e878778..1fdb504 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp512 | 66.60 ± 0.42 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg128 | 17.28 ± 0.01 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp512 | 121.80 ± 1.81 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg128 | 18.10 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma.log index cd9f858..1e65c4b 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 438.42 ± 4.14 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.57 ± 0.01 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 426.32 ± 6.04 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 25.00 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log index 3adfafe..f0325ad 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 475.43 ± 7.40 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.08 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 479.22 ± 4.77 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.91 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log index 96ee7c7..3f88455 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 158.13 ± 2.40 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.58 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 158.56 ± 4.20 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 25.03 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log index e330db0..f0a6412 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 163.40 ± 3.21 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.14 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 165.57 ± 2.56 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.89 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3.log index f983fbc..cff659b 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 441.36 ± 3.35 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.60 ± 0.01 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 432.19 ± 6.24 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 25.02 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__fa1.log index 04c9b9c..77fe8ec 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 337.36 ± 3.48 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.45 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 477.24 ± 5.25 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.88 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0.log index c1ff6cb..501ffea 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 161.73 ± 1.23 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.58 ± 0.01 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 162.44 ± 4.25 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 25.05 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log index aaa3b75..1c49a1a 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 143.05 ± 2.10 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.42 ± 0.01 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 160.17 ± 3.26 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.91 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log new file mode 100644 index 0000000..11cfce9 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 435.53 ± 2.47 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.69 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log new file mode 100644 index 0000000..cdcd330 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 476.36 ± 3.91 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.93 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log new file mode 100644 index 0000000..85da425 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 330.47 ± 5.12 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 25.09 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log new file mode 100644 index 0000000..b9f0191 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 343.19 ± 4.41 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.90 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma.log index 7f744f7..02d9e91 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 448.63 ± 5.90 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.96 ± 0.02 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 431.59 ± 5.03 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 25.06 ± 0.01 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log index 61f996e..7c575f0 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 473.34 ± 8.60 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 23.99 ± 0.01 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 476.09 ± 5.36 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.93 ± 0.01 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log index 8b322b7..ca2d22c 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 338.07 ± 3.03 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.93 ± 0.03 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 332.32 ± 3.60 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 25.11 ± 0.01 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log index a3675a1..368ff16 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 342.57 ± 3.12 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 23.97 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 344.55 ± 3.84 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.92 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log index c7f3224..579c663 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 444.30 ± 6.78 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.66 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 431.29 ± 3.17 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 25.10 ± 0.01 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log index 8384bda..aadc637 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log @@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x38fecea0) on address 0x7f31ea76f000. Reason: Page not present or supervisor privilege. -✖ ! [rocm7_rc] Qwen3-30B-A3B-BF16-00001-of-00002 __fa1 failed (exit 134) +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 475.35 ± 3.41 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.94 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0.log index aab676c..9061b2e 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 333.42 ± 6.83 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.69 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 329.24 ± 2.98 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 25.06 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log index dba441c..ae585b8 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log @@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x1f121ea0) on address 0x7fd78e16f000. Reason: Page not present or supervisor privilege. -✖ ! [rocm7_rc] Qwen3-30B-A3B-BF16-00001-of-00002 __hblt0__fa1 failed (exit 134) +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 348.53 ± 5.60 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.92 ± 0.01 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log index 67aa5d3..9733b38 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | pp512 | 106.47 ± 0.10 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | tg128 | 8.18 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | pp512 | 139.51 ± 0.90 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | tg128 | 8.31 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log index de3ec24..89ebf52 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 106.77 ± 0.12 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 8.11 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 140.62 ± 1.53 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 8.26 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log index 7ea35f3..f88d6ea 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | pp512 | 84.71 ± 0.11 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | tg128 | 7.52 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | pp512 | 150.84 ± 1.38 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | tg128 | 8.24 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log index 2aa8bc0..37c3d79 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 85.70 ± 0.10 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 7.52 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 156.53 ± 2.33 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 8.29 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma.log index 7499112..b163e9b 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 564.83 ± 6.58 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 50.68 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 538.66 ± 2.16 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 53.01 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__fa1.log index 8947515..d8e45a2 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 624.99 ± 3.81 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 48.64 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 620.78 ± 3.75 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 52.74 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0.log index 1488828..4b2bee2 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 389.25 ± 2.01 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 50.66 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 389.41 ± 1.99 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 53.13 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log index d3d972e..0cc6bbd 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 412.18 ± 1.15 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 48.80 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 413.64 ± 1.55 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 52.76 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3.log index bb39f34..26ad0c6 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 562.86 ± 10.14 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 50.74 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 543.05 ± 4.56 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 53.14 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__fa1.log index e501cb6..c9f4948 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 418.07 ± 1.65 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 50.11 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 624.71 ± 4.66 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 52.74 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0.log index 87b8aec..ee0b076 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 387.74 ± 1.70 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 50.65 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 387.73 ± 2.27 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 53.30 ± 0.02 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0__fa1.log index 17416ee..6fe28d1 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 301.31 ± 0.65 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 50.37 ± 0.02 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 415.19 ± 1.76 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 52.59 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants.log new file mode 100644 index 0000000..a014b00 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 552.48 ± 1.39 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 53.22 ± 0.01 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants__fa1.log new file mode 100644 index 0000000..2e173b9 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 618.51 ± 8.44 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 52.82 ± 0.01 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0.log new file mode 100644 index 0000000..be7305a --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 576.87 ± 7.86 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 53.42 ± 0.01 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log new file mode 100644 index 0000000..9741f2c --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 609.51 ± 4.26 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 52.77 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma.log index 444ca09..9bb2bf0 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 570.31 ± 5.05 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 50.52 ± 0.02 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 555.30 ± 3.11 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 53.34 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log index c11bc62..f4f3cce 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 612.79 ± 4.77 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 46.73 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 618.71 ± 2.77 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 52.77 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0.log index e17b52a..d707246 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 572.09 ± 8.22 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 50.45 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 575.05 ± 4.27 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 53.33 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log index 19e8321..1856861 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 605.49 ± 1.47 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 46.73 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 618.89 ± 4.53 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 52.69 ± 0.01 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log index f2e3ece..ac9d02d 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 573.05 ± 6.77 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 50.80 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 549.65 ± 6.16 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 53.42 ± 0.02 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log index 3fa7435..4848dd8 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 416.05 ± 3.44 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 50.33 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 621.80 ± 7.09 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 52.78 ± 0.01 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0.log index db8de56..37ba478 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 576.38 ± 3.91 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 50.85 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 575.05 ± 3.02 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 53.42 ± 0.01 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log index f19e470..75f7de4 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 414.62 ± 3.23 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 50.22 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 614.05 ± 4.83 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 52.83 ± 0.01 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log index 10609cc..e1f8a5a 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 0 | pp512 | 733.40 ± 2.59 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 0 | tg128 | 59.36 ± 0.05 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 0 | pp512 | 1027.23 ± 5.64 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 0 | tg128 | 63.42 ± 0.03 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log index 486113e..460a956 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 725.54 ± 2.84 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 55.57 ± 0.02 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 1005.86 ± 4.35 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 59.12 ± 0.04 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log index 00cd713..e4e2810 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 0 | pp512 | 392.54 ± 1.80 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 0 | tg128 | 61.56 ± 0.02 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 0 | pp512 | 764.63 ± 3.75 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 0 | tg128 | 64.77 ± 0.10 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log index 3123ba1..b854ab4 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 403.74 ± 1.69 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 60.57 ± 0.08 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 801.22 ± 4.04 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 63.44 ± 0.12 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma.log index ba675a0..433401a 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 734.26 ± 0.94 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.05 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 726.41 ± 1.42 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.15 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__fa1.log index a834fd9..1e7652b 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 820.41 ± 1.59 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.77 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 822.38 ± 0.84 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.87 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0.log index 3b668bb..3242eea 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 213.40 ± 3.62 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.04 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 219.78 ± 3.65 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.15 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log index ed14086..8b27e59 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 224.20 ± 4.73 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.78 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 227.29 ± 2.29 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.87 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3.log index b6d34c8..3cdf4e3 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 734.70 ± 1.48 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.03 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 703.97 ± 0.49 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.15 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__fa1.log index a9a4641..5d82b39 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 554.49 ± 0.62 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.78 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 803.68 ± 0.98 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.89 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0.log index 21731f6..2121415 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 220.22 ± 1.60 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.04 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 222.73 ± 0.32 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.15 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0__fa1.log index ecb65ad..299085e 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 193.90 ± 1.19 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.77 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 220.75 ± 2.43 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.86 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants.log new file mode 100644 index 0000000..b9c2ba9 --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 714.52 ± 1.47 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.16 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants__fa1.log new file mode 100644 index 0000000..dd2b67e --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 810.36 ± 1.88 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.89 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0.log new file mode 100644 index 0000000..b5de28a --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 752.18 ± 0.86 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.14 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log new file mode 100644 index 0000000..3a710b2 --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 797.91 ± 0.87 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.88 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma.log index f5c1c56..d2b180e 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 751.04 ± 1.24 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.01 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 738.56 ± 1.66 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.15 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log index c86174c..7e8c499 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 811.04 ± 1.22 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.45 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 810.24 ± 2.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.89 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0.log index 3f70b79..6871a37 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 752.99 ± 1.44 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.00 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 751.87 ± 1.69 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.15 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log index 4cc26aa..a0de968 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 794.90 ± 1.42 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.45 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 798.06 ± 1.45 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.88 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log index 301a2b5..48ce734 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 752.36 ± 0.48 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.05 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 738.50 ± 1.56 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.15 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log index 1ddb96e..a0dd7bf 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 569.66 ± 0.60 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.78 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 801.53 ± 1.48 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.88 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0.log index e22f092..352a972 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 750.36 ± 1.88 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.05 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 751.81 ± 0.96 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.15 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log index 01fed10..3bdf7bd 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 559.73 ± 0.51 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.79 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 791.04 ± 2.55 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.87 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log index 6d1ed7b..431a37b 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 0 | pp512 | 680.44 ± 0.55 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 0 | tg128 | 14.39 ± 0.03 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 0 | pp512 | 679.86 ± 1.33 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 0 | tg128 | 14.60 ± 0.03 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log index 3c0a8e7..bbdf2e7 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp512 | 371.66 ± 0.51 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg128 | 12.62 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp512 | 659.67 ± 0.72 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg128 | 14.50 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log index 5a3ee90..2c0ca73 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 0 | pp512 | 502.88 ± 1.45 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 0 | tg128 | 14.21 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 0 | pp512 | 504.31 ± 3.20 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 0 | tg128 | 14.14 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log index 8f4867f..f084df4 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp512 | 496.33 ± 1.83 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg128 | 14.02 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp512 | 501.78 ± 2.80 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg128 | 13.95 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma.log index 523552b..0199908 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 395.28 ± 0.22 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 3.96 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 413.72 ± 0.86 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.09 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log index f8b41b5..86b7295 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 468.37 ± 1.54 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.08 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 469.46 ± 1.37 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.11 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log index 1ce39b8..eb32966 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 79.42 ± 0.41 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 3.97 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 84.71 ± 8.12 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.10 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log index 29c9209..cc49148 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 89.19 ± 0.53 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.06 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 84.12 ± 9.82 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.11 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3.log index fcaf5b3..8dc76ac 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 398.35 ± 1.07 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.09 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 408.40 ± 1.09 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.10 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__fa1.log index bb05e2f..f94a57d 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 302.82 ± 2.53 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.09 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 470.49 ± 1.46 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.10 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0.log index f41ad73..4fb6887 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 59.13 ± 7.79 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.09 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 84.93 ± 8.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.10 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log index c07bd16..ee44cb5 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 61.26 ± 10.54 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.09 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 83.22 ± 10.78 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.10 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log new file mode 100644 index 0000000..3332c2a --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 412.86 ± 1.22 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.10 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log new file mode 100644 index 0000000..a8e7f5a --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 465.55 ± 1.95 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.10 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log new file mode 100644 index 0000000..df6e790 --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 453.66 ± 0.77 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.10 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log new file mode 100644 index 0000000..89d3127 --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 498.77 ± 0.53 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.10 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma.log index d48d219..282d7f2 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 418.46 ± 0.10 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 419.05 ± 0.86 | | gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.09 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log index ba0b9fa..2c4f703 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 466.83 ± 1.65 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.07 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 466.36 ± 1.34 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.10 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log index f152fdb..4cda7ad 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 454.10 ± 1.09 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 451.57 ± 0.41 | | gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.09 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log index a66ffac..c745c9f 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 499.43 ± 1.24 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.06 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 499.87 ± 0.47 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.10 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log index adb03dd..9f5c43c 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 392.50 ± 0.50 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 3.97 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 422.00 ± 0.56 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.10 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log index 5ec86d3..e43f6df 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 311.25 ± 0.72 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.09 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 469.45 ± 1.83 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.11 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0.log index bec2363..2253da3 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 451.69 ± 0.62 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 453.24 ± 0.64 | | gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.09 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log index 3a00d5c..98a164a 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 324.43 ± 0.22 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.09 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 502.26 ± 0.81 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.10 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log index 9dfdc23..19c65b3 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 0 | pp512 | 129.49 ± 0.34 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 0 | tg128 | 4.06 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 0 | pp512 | 134.52 ± 0.99 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 0 | tg128 | 3.92 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log index 96c0e93..a1f09eb 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | pp512 | 137.67 ± 1.25 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | tg128 | 4.06 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | pp512 | 138.59 ± 1.23 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | tg128 | 3.93 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma.log index 4a3544a..8013724 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 2033.46 ± 5.16 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 76.47 ± 0.26 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 1886.62 ± 6.81 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 76.36 ± 5.10 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__fa1.log index 78f4e0c..725652c 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2276.86 ± 9.60 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 70.76 ± 0.26 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2282.08 ± 7.86 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 72.40 ± 0.03 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0.log index 54a23d2..3fa4608 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 727.18 ± 2.22 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 75.65 ± 0.74 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 713.12 ± 38.25 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 67.01 ± 5.06 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0__fa1.log index 24c6a23..978ff37 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 740.27 ± 10.38 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 70.76 ± 0.11 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 676.80 ± 75.42 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 59.10 ± 2.57 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3.log index e669ba8..654df84 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 2035.38 ± 4.03 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 75.40 ± 0.80 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 1857.54 ± 7.32 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 75.34 ± 7.91 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__fa1.log index 1562460..1842c2a 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 1515.55 ± 8.10 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 70.20 ± 0.39 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2214.91 ± 7.20 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 72.36 ± 0.05 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0.log index b49eeff..83e72da 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 714.75 ± 27.98 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 66.10 ± 5.25 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 656.82 ± 60.97 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 63.81 ± 3.45 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0__fa1.log index f75c3de..60ffb7f 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 596.86 ± 37.66 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 58.75 ± 3.09 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 663.36 ± 79.77 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 58.63 ± 2.61 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants.log new file mode 100644 index 0000000..0a762e2 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 1830.34 ± 15.12 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 60.04 ± 4.39 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants__fa1.log new file mode 100644 index 0000000..b4e8cfb --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2178.17 ± 91.83 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 55.78 ± 3.20 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants__hblt0.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants__hblt0.log new file mode 100644 index 0000000..9113e30 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants__hblt0.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 2016.93 ± 4.81 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 58.29 ± 3.79 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log new file mode 100644 index 0000000..8260f58 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2094.58 ± 12.74 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 66.23 ± 8.72 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma.log index c7f6ba3..d25fd4e 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 2014.60 ± 24.35 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 59.16 ± 3.76 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 1865.95 ± 7.12 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 79.56 ± 0.09 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log index 0862fc6..fe4b290 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2191.77 ± 78.21 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 54.32 ± 2.65 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2252.60 ± 11.74 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 72.57 ± 0.05 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0.log index 4293b33..5483c20 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 1991.71 ± 2.91 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 56.37 ± 3.40 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 2011.51 ± 6.91 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 79.65 ± 0.03 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log index 1af2b23..e572ab1 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2096.22 ± 4.59 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 64.88 ± 0.05 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2111.65 ± 7.03 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 67.62 ± 4.71 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log index 7e86d5a..362a77a 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 2027.41 ± 4.62 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 77.12 ± 0.03 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 1869.83 ± 5.67 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 79.48 ± 0.04 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log index a3497bf..39df255 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 1550.55 ± 4.52 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 70.54 ± 0.06 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2229.43 ± 7.33 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 72.58 ± 0.03 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0.log index 51f19f4..53225e6 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 1992.48 ± 7.34 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 77.05 ± 0.03 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 2014.48 ± 4.39 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 79.61 ± 0.04 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log index a65b575..d2c4064 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 1474.15 ± 1.44 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 70.44 ± 0.01 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2064.91 ± 7.11 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 72.45 ± 0.03 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log index 2624621..65403f9 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 0 | pp512 | 1593.62 ± 2.90 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 0 | tg128 | 85.26 ± 0.26 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 0 | pp512 | 1288.81 ± 206.13 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 0 | tg128 | 86.61 ± 1.74 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log index 20e82ce..14f5f51 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp512 | 936.52 ± 2.35 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg128 | 60.89 ± 0.10 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp512 | 1149.64 ± 181.24 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg128 | 85.50 ± 1.74 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log index 6ba35a8..799317c 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 0 | pp512 | 1515.05 ± 2.98 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 0 | tg128 | 87.54 ± 0.18 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 0 | pp512 | 967.51 ± 123.30 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 0 | tg128 | 86.74 ± 1.45 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log index a1a86a0..513be95 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp512 | 1476.16 ± 5.12 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg128 | 82.48 ± 0.36 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp512 | 991.94 ± 120.98 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg128 | 85.61 ± 0.84 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma.log index 2b2057d..9e4b08e 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma.log @@ -2,14 +2,14 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found +hipBLASLt error: Heuristic Fetch Failed! This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 622.16 ± 6.71 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 33.91 ± 0.01 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 629.19 ± 3.98 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 34.79 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__fa1.log index a8e0637..5fe051d 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 743.09 ± 4.89 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 33.76 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 780.88 ± 9.39 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 34.14 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0.log index d94bfc6..8a0d184 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 354.98 ± 0.72 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 33.86 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 364.08 ± 1.11 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 34.81 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0__fa1.log index 95ce008..aaf527f 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 390.67 ± 0.97 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 33.79 ± 0.01 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 400.84 ± 0.84 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 34.17 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_3.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3.log index 320bdde..41af850 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_3.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 617.00 ± 4.97 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 33.90 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 627.57 ± 4.14 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 34.78 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_3__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3__fa1.log index aaf9547..f097e15 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_3__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 543.39 ± 5.51 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 33.28 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 783.40 ± 1.22 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 34.15 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_3__hblt0.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3__hblt0.log index 717bdb1..7346088 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_3__hblt0.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3__hblt0.log @@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 354.18 ± 0.29 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 33.88 ± 0.00 | - -build: 1fe00296 (6182) +HW Exception by GPU node-1 (Agent handle: 0x37f5d310) reason :GPU Hang +✖ ! [rocm6_4_3] gpt-oss-120b-F16 __hblt0 failed (exit 134) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_3__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3__hblt0__fa1.log index a328319..8eda0fb 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_3__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 322.46 ± 0.46 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 33.33 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 402.16 ± 1.31 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 34.16 ± 0.02 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants.log new file mode 100644 index 0000000..c69bc04 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 650.02 ± 4.28 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 34.00 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants__fa1.log new file mode 100644 index 0000000..816395b --- /dev/null +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 778.25 ± 3.40 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 34.25 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants__hblt0.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants__hblt0.log new file mode 100644 index 0000000..3948df0 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants__hblt0.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 688.70 ± 7.72 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 34.83 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log new file mode 100644 index 0000000..9ccc2fb --- /dev/null +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 780.39 ± 6.28 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 34.20 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma.log index bfcbd06..f7645fb 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 643.61 ± 7.14 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 33.91 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 653.89 ± 3.96 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 34.89 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log index 9e50477..c286eb1 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 736.33 ± 3.33 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 33.74 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 770.19 ± 5.64 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 34.18 ± 0.01 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0.log index a3eb53e..ab867d3 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 651.63 ± 3.08 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 33.88 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 691.27 ± 4.90 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 34.83 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log index 422fcf2..07e6796 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 738.84 ± 9.12 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 33.79 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 772.44 ± 6.68 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 34.24 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc.log index 8fc2f66..34bd11a 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 649.28 ± 0.87 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 33.99 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 653.09 ± 7.25 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 34.86 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc__fa1.log index 35770c3..384d751 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 550.01 ± 3.85 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 33.38 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 779.77 ± 4.78 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 34.28 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0.log index 7ed9087..3cc22b3 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 659.79 ± 3.13 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 34.01 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 683.90 ± 5.36 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 34.77 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log index af54d9f..1f2d84e 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 553.65 ± 2.40 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 33.31 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 777.37 ± 4.77 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 34.17 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk.log b/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk.log index 9f2e80d..d68fc95 100644 --- a/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk.log +++ b/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 0 | pp512 | 449.86 ± 1.68 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 0 | tg128 | 34.19 ± 0.02 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 0 | pp512 | 626.37 ± 2.34 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 0 | tg128 | 35.23 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log b/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log index 9fa4616..10c4fa1 100644 --- a/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 496.21 ± 1.71 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 33.64 ± 0.01 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 723.80 ± 2.49 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 34.59 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-F16__vulkan_radv.log b/benchmark/results/gpt-oss-120b-F16__vulkan_radv.log index 71ff53d..80e1e5e 100644 --- a/benchmark/results/gpt-oss-120b-F16__vulkan_radv.log +++ b/benchmark/results/gpt-oss-120b-F16__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 0 | pp512 | 230.09 ± 0.83 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 0 | tg128 | 33.57 ± 0.02 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 0 | pp512 | 401.61 ± 1.76 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 0 | tg128 | 33.86 ± 0.03 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-F16__vulkan_radv__fa1.log b/benchmark/results/gpt-oss-120b-F16__vulkan_radv__fa1.log index 547d915..2c9a045 100644 --- a/benchmark/results/gpt-oss-120b-F16__vulkan_radv__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 243.96 ± 0.96 | -| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 33.79 ± 0.01 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 444.61 ± 1.65 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 33.84 ± 0.02 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma.log index f3ebd8d..3b2c5d3 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 606.86 ± 5.18 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 45.26 ± 0.02 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 612.55 ± 6.58 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 47.08 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__fa1.log index 78fca14..78f5bac 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 732.72 ± 4.06 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 45.14 ± 0.01 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 766.08 ± 2.67 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 45.93 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log index 0436056..6d219e6 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 351.42 ± 1.56 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 45.39 ± 0.01 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 362.01 ± 1.06 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 47.04 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log index 4fe67c3..0808d48 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x3273c340) reason :GPU Hang +HW Exception by GPU node-1 (Agent handle: 0x32c91340) reason :GPU Hang ✖ ! [rocm6_4_3-rocwmma] gpt-oss-120b-mxfp4-00001-of-00003 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3.log index f5b3307..7506230 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 608.20 ± 7.04 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 45.40 ± 0.01 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 614.68 ± 3.32 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 47.04 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__fa1.log index 7a9d128..8093c5f 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 533.95 ± 3.58 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 44.41 ± 0.03 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 768.28 ± 5.81 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 45.86 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0.log index 3f1f7ba..636f327 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 352.53 ± 0.81 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 45.41 ± 0.01 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 362.06 ± 1.45 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 47.11 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0__fa1.log index fa1e33d..957a4d0 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 320.78 ± 0.96 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 44.49 ± 0.03 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 397.06 ± 1.41 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 46.01 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log new file mode 100644 index 0000000..85ade63 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 639.82 ± 2.41 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 46.29 ± 0.01 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log new file mode 100644 index 0000000..28c282b --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 756.98 ± 1.30 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 46.00 ± 0.01 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log new file mode 100644 index 0000000..ccbcd7a --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 683.94 ± 2.89 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 47.29 ± 0.01 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log new file mode 100644 index 0000000..8b573b2 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 778.15 ± 4.46 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 46.04 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma.log index 17a14c7..b346209 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 635.84 ± 5.72 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 45.26 ± 0.01 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 641.91 ± 7.56 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 47.20 ± 0.01 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log index 8695248..4300bb0 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 708.36 ± 12.96 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 44.85 ± 0.01 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 756.17 ± 4.24 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 46.05 ± 0.01 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0.log index 117d484..d80b648 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 650.68 ± 9.08 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 45.26 ± 0.01 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 681.37 ± 3.54 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 47.19 ± 0.01 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log index 173cfbe..428b0ae 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 734.35 ± 10.26 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 44.85 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 770.60 ± 3.18 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 46.09 ± 0.01 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log index a5dcda7..b581add 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 646.07 ± 6.86 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 45.50 ± 0.01 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 641.87 ± 3.27 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 47.17 ± 0.01 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log index c8991e9..8aaf148 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 541.57 ± 3.26 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 44.31 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 757.39 ± 3.80 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 46.04 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0.log index f2a91fb..cb85c29 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 657.58 ± 3.78 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 45.56 ± 0.01 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 688.94 ± 3.90 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 47.06 ± 0.01 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log index 698f338..53cccf3 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 550.79 ± 2.99 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 44.41 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 769.31 ± 5.48 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 46.07 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log index 9d4e9e1..fc9cedb 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 0 | pp512 | 485.54 ± 2.45 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 0 | tg128 | 49.29 ± 0.03 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 0 | pp512 | 682.60 ± 3.30 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 0 | tg128 | 51.41 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log index c4cd434..82270b1 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 540.81 ± 2.56 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 48.25 ± 0.03 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 790.49 ± 4.84 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 50.15 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log index acd5b3a..f38971f 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 0 | pp512 | 239.24 ± 1.27 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 0 | tg128 | 50.39 ± 0.05 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 0 | pp512 | 426.15 ± 2.65 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 0 | tg128 | 52.79 ± 0.16 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log index f04d91c..d3f726b 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 255.50 ± 1.49 | -| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 50.41 ± 0.04 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 478.69 ± 3.26 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 52.75 ± 0.06 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma.log index 45d2dea..802b5a6 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1198.51 ± 10.39 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.14 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1140.40 ± 8.72 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.24 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__fa1.log index 219d081..5aa7e87 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1506.44 ± 7.03 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 27.10 ± 0.01 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1492.30 ± 22.33 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 26.96 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0.log index 2c0a1a3..5cd945e 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 326.80 ± 4.56 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.13 ± 0.01 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 327.64 ± 1.89 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.19 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0__fa1.log index b04117d..8086999 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 350.18 ± 5.10 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 27.09 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 342.77 ± 3.39 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 27.00 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_3.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3.log index 8d5aeeb..64e0253 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_3.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1185.57 ± 6.55 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.12 ± 0.01 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1147.38 ± 6.40 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.24 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_3__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3__fa1.log index 4138942..2e70988 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_3__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1000.77 ± 2.37 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 26.83 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1508.59 ± 26.99 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 27.00 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_3__hblt0.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3__hblt0.log index 76ec711..4437a50 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_3__hblt0.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 322.00 ± 4.37 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.14 ± 0.01 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 326.33 ± 6.68 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.20 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_3__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3__hblt0__fa1.log index 9ce8ee4..3ae063a 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_3__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 303.26 ± 4.84 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 26.90 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 344.41 ± 7.32 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 26.96 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants.log new file mode 100644 index 0000000..2e92934 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1202.41 ± 13.79 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 26.03 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants__fa1.log new file mode 100644 index 0000000..e6da7d3 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1484.60 ± 5.26 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 26.90 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants__hblt0.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants__hblt0.log new file mode 100644 index 0000000..7d9f2db --- /dev/null +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants__hblt0.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1225.63 ± 9.42 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.25 ± 0.01 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log new file mode 100644 index 0000000..d23467b --- /dev/null +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1421.82 ± 12.16 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 26.95 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma.log index d80b5e6..8300d8a 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1256.75 ± 10.54 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.11 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1209.21 ± 16.57 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.23 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log index 2648480..9cb1cb9 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1481.17 ± 9.67 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 27.03 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1489.00 ± 6.12 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 26.98 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0.log index 9c6df9d..9bb131b 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1202.19 ± 5.53 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.10 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1218.32 ± 13.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.21 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log index 8b68b31..2210bb8 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1422.90 ± 11.48 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 27.04 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1424.60 ± 8.06 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 26.98 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc.log index 8a71c43..1aa5162 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1253.01 ± 23.20 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.11 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1198.99 ± 21.23 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.25 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc__fa1.log index eaf7b98..dd419ad 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1005.24 ± 32.45 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 26.89 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1506.46 ± 15.83 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 26.98 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0.log index 685527d..9a0a4d9 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1220.02 ± 12.30 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.17 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1224.83 ± 11.58 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.20 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log index 18915d7..96a9cd4 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 985.58 ± 10.64 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 26.88 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1446.22 ± 17.28 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 26.99 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk.log b/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk.log index 381cc89..8e3da78 100644 --- a/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk.log +++ b/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 0 | pp512 | 367.61 ± 1.90 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 0 | tg128 | 8.69 ± 0.01 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 0 | pp512 | 469.06 ± 6.82 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 0 | tg128 | 15.25 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log b/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log index 4212c20..efee579 100644 --- a/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 386.12 ± 1.98 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 8.66 ± 0.01 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 514.17 ± 6.35 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 15.13 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-F32__vulkan_radv.log b/benchmark/results/gpt-oss-20b-F32__vulkan_radv.log index a3f7dda..7b2fe16 100644 --- a/benchmark/results/gpt-oss-20b-F32__vulkan_radv.log +++ b/benchmark/results/gpt-oss-20b-F32__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 0 | pp512 | 315.56 ± 1.40 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 0 | tg128 | 7.86 ± 0.01 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 0 | pp512 | 537.62 ± 2.10 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 0 | tg128 | 14.85 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-F32__vulkan_radv__fa1.log b/benchmark/results/gpt-oss-20b-F32__vulkan_radv__fa1.log index 257f941..830fbc3 100644 --- a/benchmark/results/gpt-oss-20b-F32__vulkan_radv__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 333.31 ± 1.47 | -| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 7.92 ± 0.01 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 583.32 ± 3.38 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 14.86 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma.log index 2c92204..65b10c2 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1184.03 ± 8.37 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 65.07 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1164.39 ± 11.24 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 67.35 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__fa1.log index cd2e38f..1edd08e 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1480.28 ± 9.38 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 64.45 ± 0.02 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1531.44 ± 9.83 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 65.78 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0.log index f7e992e..1f991a5 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 584.04 ± 2.52 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 64.87 ± 0.02 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 590.66 ± 1.42 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 67.35 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0__fa1.log index 2016c11..084ea0a 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 643.25 ± 3.86 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 64.67 ± 0.01 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 653.80 ± 1.17 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 65.72 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3.log index d5473e4..aae04b7 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1171.02 ± 7.04 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 64.94 ± 0.04 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1160.12 ± 12.72 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 67.19 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3__fa1.log index abd5fd2..eb25e8e 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 996.31 ± 6.53 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 63.68 ± 0.01 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1539.79 ± 14.33 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 65.81 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0.log index be6782a..cee8253 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 582.51 ± 2.41 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 64.89 ± 0.01 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 591.28 ± 2.68 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 67.35 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0__fa1.log index 90bc0d7..7dbc6fc 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 522.63 ± 1.74 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 63.66 ± 0.03 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 655.10 ± 1.75 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 65.85 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants.log new file mode 100644 index 0000000..a52d858 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1222.12 ± 10.04 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 67.34 ± 0.01 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants__fa1.log new file mode 100644 index 0000000..5070ee2 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1515.09 ± 6.22 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 65.75 ± 0.00 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants__hblt0.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants__hblt0.log new file mode 100644 index 0000000..598c877 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants__hblt0.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1335.14 ± 17.02 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 67.33 ± 0.01 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log new file mode 100644 index 0000000..8fe24ad --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1562.66 ± 9.76 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 65.84 ± 0.01 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma.log index 8e4bdde..f28a63e 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1236.64 ± 11.20 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 64.78 ± 0.01 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1215.59 ± 8.93 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 67.39 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log index c87a02c..c75c722 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1460.58 ± 11.92 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 64.26 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1521.41 ± 10.84 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 65.89 ± 0.01 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0.log index 2dd9efb..fdcb0a4 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1299.34 ± 7.77 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 64.85 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1334.89 ± 9.58 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 67.43 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log index fc2acae..f407d2f 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1516.33 ± 21.51 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 64.40 ± 0.01 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1567.58 ± 12.62 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 65.78 ± 0.01 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc.log index 34dc4e8..0607b26 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1246.14 ± 8.32 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 65.15 ± 0.01 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1219.34 ± 5.57 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 67.37 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log index f6feec3..120f9df 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1010.38 ± 6.35 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 63.49 ± 0.01 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1530.70 ± 9.71 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 65.84 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0.log index 425b7fd..079a2c7 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1303.74 ± 6.94 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 65.10 ± 0.01 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1331.00 ± 21.19 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 67.41 ± 0.01 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log index 0bd1151..1d5c6f3 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1037.92 ± 11.67 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 63.63 ± 0.01 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1575.63 ± 16.60 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 65.76 ± 0.01 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log index 10fffaa..3df5436 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 0 | pp512 | 1220.69 ± 8.95 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 0 | tg128 | 71.42 ± 0.20 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 0 | pp512 | 1498.39 ± 12.53 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 0 | tg128 | 74.08 ± 0.09 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log index 185b49c..51abfd5 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1467.61 ± 12.70 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 69.47 ± 0.09 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1914.72 ± 22.77 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 72.57 ± 0.12 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv.log index cf40790..460e04d 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 0 | pp512 | 651.21 ± 5.24 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 0 | tg128 | 72.35 ± 0.08 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 0 | pp512 | 1002.66 ± 7.71 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 0 | tg128 | 74.77 ± 0.18 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log index ed9a28f..2a7699d 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 732.35 ± 7.51 | -| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 72.05 ± 0.07 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1204.49 ± 13.52 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 74.94 ± 0.14 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma.log index 2220109..a4d3dc7 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 981.76 ± 1.61 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.26 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 979.46 ± 1.57 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.90 ± 0.02 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__fa1.log index 79d7534..2b466fa 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1096.97 ± 5.09 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 48.33 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1100.15 ± 1.95 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 49.29 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__hblt0.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__hblt0.log index 54ebe40..d2b3dfa 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__hblt0.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 348.00 ± 0.44 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.39 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 348.31 ± 0.71 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.90 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__hblt0__fa1.log index 3d3918a..5c7f9e5 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__hblt0__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 367.46 ± 0.31 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 48.20 ± 0.01 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 366.05 ± 1.98 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 49.32 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3.log index 2c94893..638eede 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 978.30 ± 1.98 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.39 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 979.46 ± 2.10 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.90 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3__fa1.log index fd39b3e..6d11aa9 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 686.88 ± 0.38 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 48.80 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1105.32 ± 2.28 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 49.24 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3__hblt0.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3__hblt0.log index 689c168..acf5f29 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3__hblt0.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 348.07 ± 0.50 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.36 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 348.63 ± 0.64 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.81 ± 0.00 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3__hblt0__fa1.log index eca19dc..6b8a5cd 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3__hblt0__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 307.39 ± 0.70 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 48.75 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 368.28 ± 0.81 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 49.35 ± 0.01 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants.log new file mode 100644 index 0000000..4db56a5 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 977.63 ± 2.98 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.91 ± 0.02 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants__fa1.log new file mode 100644 index 0000000..0fa5959 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1097.55 ± 1.49 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 49.33 ± 0.01 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants__hblt0.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants__hblt0.log new file mode 100644 index 0000000..5636ade --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants__hblt0.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 860.30 ± 0.57 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.89 ± 0.01 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log new file mode 100644 index 0000000..3b3feea --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 953.79 ± 3.60 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 49.35 ± 0.01 | + +build: f1fbffb5 (6486) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma.log index 075f5ba..08e2ece 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 978.15 ± 1.18 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.15 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 984.61 ± 2.65 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.94 ± 0.01 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log index 669a6d8..be2842e 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1089.54 ± 1.93 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 46.47 ± 0.01 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1095.50 ± 2.69 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 49.34 ± 0.01 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0.log index 761a77a..98855b5 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 863.15 ± 2.90 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.09 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 859.46 ± 1.91 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.90 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log index 655dfd3..847066a 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 947.88 ± 1.69 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 46.48 ± 0.01 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 952.18 ± 1.68 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 49.32 ± 0.01 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc.log index 8b649c5..d564f8b 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 979.59 ± 2.44 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.38 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 980.24 ± 1.40 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.90 ± 0.00 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__fa1.log index 05d74ad..e714e54 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 684.81 ± 1.14 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 48.97 ± 0.01 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1100.05 ± 4.01 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 49.29 ± 0.01 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0.log index 27bd0d7..bc6ac1c 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 865.92 ± 1.53 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.40 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 860.23 ± 0.94 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.92 ± 0.01 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log index 28a8543..f12456c 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 630.67 ± 1.16 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 48.83 ± 0.01 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 958.47 ± 2.31 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 49.29 ± 0.01 | -build: de219279 (6181) +build: f1fbffb5 (6486) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk.log index 20052b6..6162d50 100644 --- a/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk.log +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 0 | pp512 | 1305.67 ± 1.36 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 0 | tg128 | 48.48 ± 0.11 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 0 | pp512 | 1317.02 ± 4.04 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 0 | tg128 | 53.59 ± 0.07 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log index a9dca39..85db8fc 100644 --- a/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 1377.39 ± 0.62 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 47.91 ± 0.01 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 1380.42 ± 7.77 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 52.95 ± 0.07 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_radv.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv.log index f19f729..979748b 100644 --- a/benchmark/results/llama-2-7b.Q4_0__vulkan_radv.log +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 0 | pp512 | 875.74 ± 6.47 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 0 | tg128 | 52.85 ± 0.12 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 0 | pp512 | 868.70 ± 8.94 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 0 | tg128 | 54.37 ± 0.04 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1.log index 3c88a39..d3a0b5a 100644 --- a/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 957.61 ± 5.26 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 52.16 ± 0.08 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 957.23 ± 9.23 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 53.49 ± 0.04 | -build: 1fe00296 (6182) +build: f1fbffb5 (6486) diff --git a/docs/index.html b/docs/index.html index d8963c8..6cd3e4a 100644 --- a/docs/index.html +++ b/docs/index.html @@ -214,6 +214,13 @@ border: 1px solid #b19cff55; } + .faall { + background: #cfe9ff; + /* light blue chip */ + color: #000000; + border: 1px solid #9bc9ff55; + } + .meta { padding: 0 20px 14px; color: var(--muted); @@ -250,6 +257,21 @@ border-radius: 8px; } + .scroller-top { + overflow-x: auto; + overflow-y: hidden; + height: 12px; + /* slim */ + margin: 0 0 6px; + /* a little gap above the table */ + } + + .scroller-top .scroller-spacer { + height: 1px; + /* tiny content so the bar renders */ + } + + table { width: max-content; min-width: 100vw; @@ -415,28 +437,36 @@

Prompt Processing (pp512) — tokens/second

-
+
+
+
+
- +
+

Text Generation (tg128) — tokens/second

-
+
+
+
+
- +
+
diff --git a/docs/results.json b/docs/results.json index f632b47..695b5e1 100644 --- a/docs/results.json +++ b/docs/results.json @@ -1,15 +1,11 @@ { "meta": { - "generated_at": "2025-08-18T21:21:31Z", + "generated_at": "2025-09-16T21:05:07Z", "os_kernel": "Fedora 42 \u2014 Linux 6.15.9-201.fc42.x86_64 (Sat Aug 2 11:37:34 UTC 2025)", "llamacpp_builds": [ { - "hash": "1fe00296", - "number": "6182" - }, - { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } ], "environments": [ @@ -20,6 +16,8 @@ "rocm7_rc", "rocm7_rc-hblt0", "rocm7_rc-rocwmma", + "rocm7_rc-rocwmma-fa_all_quants", + "rocm7_rc-rocwmma-fa_all_quants-hblt0", "rocm7_rc-rocwmma-hblt0", "vulkan_amdvlk", "vulkan_radv" @@ -35,8 +33,8 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 126.62, - "tps_std": 0.1, + "tps_mean": 125.93, + "tps_std": 0.26, "error": false, "error_type": null, "backend": "ROCm", @@ -48,8 +46,8 @@ "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -60,8 +58,8 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 19.95, - "tps_std": 0.02, + "tps_mean": 20.52, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -73,8 +71,8 @@ "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -85,8 +83,8 @@ "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 135.1, - "tps_std": 0.35, + "tps_mean": 135.4, + "tps_std": 0.23, "error": false, "error_type": null, "backend": "ROCm", @@ -98,8 +96,8 @@ "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -110,8 +108,8 @@ "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 20.14, - "tps_std": 0.01, + "tps_mean": 20.69, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -123,8 +121,8 @@ "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -135,8 +133,8 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "pp512", - "tps_mean": 130.99, - "tps_std": 0.36, + "tps_mean": 132.28, + "tps_std": 0.14, "error": false, "error_type": null, "backend": "ROCm", @@ -148,8 +146,8 @@ "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -160,8 +158,8 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "tg128", - "tps_mean": 20.14, - "tps_std": 0.01, + "tps_mean": 20.5, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -173,8 +171,8 @@ "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -185,8 +183,8 @@ "env_variant": "rocwmma-hblt0", "fa": true, "test": "pp512", - "tps_mean": 140.15, - "tps_std": 0.41, + "tps_mean": 139.86, + "tps_std": 0.32, "error": false, "error_type": null, "backend": "ROCm", @@ -198,8 +196,8 @@ "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -210,8 +208,8 @@ "env_variant": "rocwmma-hblt0", "fa": true, "test": "tg128", - "tps_mean": 20.15, - "tps_std": 0.01, + "tps_mean": 20.7, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -223,8 +221,8 @@ "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -235,7 +233,607 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 126.66, + "tps_mean": 125.92, + "tps_std": 0.27, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 20.52, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 134.12, + "tps_std": 0.59, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 20.66, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 131.45, + "tps_std": 0.35, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 20.53, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 140.67, + "tps_std": 0.26, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 20.67, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": false, + "test": "pp512", + "tps_mean": 94.56, + "tps_std": 0.11, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": false, + "test": "tg128", + "tps_mean": 19.9, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": true, + "test": "pp512", + "tps_mean": 127.25, + "tps_std": 0.57, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": true, + "test": "tg128", + "tps_mean": 20.66, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 128.69, + "tps_std": 0.57, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 20.56, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 169.19, + "tps_std": 0.12, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 20.67, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 94.71, + "tps_std": 0.12, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 20.53, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 126.97, + "tps_std": 0.54, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 20.7, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 160.39, + "tps_std": 0.34, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 20.56, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 169.35, + "tps_std": 0.56, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 20.65, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 94.73, "tps_std": 0.22, "error": false, "error_type": null, @@ -246,354 +844,10 @@ "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 20.14, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 100.2, - "tps_std": 0.13, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 20.3, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0.log", - "build": null - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log", - "build": null - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 117.48, - "tps_std": 0.53, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 20.11, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 126.27, - "tps_std": 0.47, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 19.86, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 158.54, - "tps_std": 0.42, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 20.11, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 166.11, - "tps_std": 0.32, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 19.83, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 89.6, - "tps_std": 0.2, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -604,7 +858,7 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 20.22, + "tps_mean": 20.47, "tps_std": 0.0, "error": false, "error_type": null, @@ -617,8 +871,8 @@ "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -629,8 +883,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 64.66, - "tps_std": 0.16, + "tps_mean": 93.27, + "tps_std": 0.18, "error": false, "error_type": null, "backend": "ROCm", @@ -642,8 +896,8 @@ "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -654,7 +908,7 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 20.35, + "tps_mean": 20.67, "tps_std": 0.0, "error": false, "error_type": null, @@ -667,8 +921,8 @@ "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -678,20 +932,48 @@ "env_base": "rocm7_rc", "env_variant": "hblt0", "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, + "test": "pp512", + "tps_mean": 159.89, + "tps_std": 0.44, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log", - "build": null + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 20.55, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", @@ -700,20 +982,48 @@ "env_base": "rocm7_rc", "env_variant": "hblt0", "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, + "test": "pp512", + "tps_mean": 170.42, + "tps_std": 0.33, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", - "build": null + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 20.66, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", @@ -723,8 +1033,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 197.95, - "tps_std": 0.29, + "tps_mean": 217.22, + "tps_std": 0.49, "error": false, "error_type": null, "backend": "Vulkan", @@ -736,8 +1046,8 @@ "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -748,107 +1058,7 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 23.24, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 199.4, - "tps_std": 0.35, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 23.26, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 126.28, - "tps_std": 0.17, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 23.33, + "tps_mean": 24.18, "tps_std": 0.01, "error": false, "error_type": null, @@ -859,10 +1069,110 @@ "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 219.61, + "tps_std": 0.55, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 24.21, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 212.6, + "tps_std": 0.74, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 24.18, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -873,8 +1183,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 131.64, - "tps_std": 0.32, + "tps_mean": 224.85, + "tps_std": 2.55, "error": false, "error_type": null, "backend": "Vulkan", @@ -886,8 +1196,8 @@ "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -898,7 +1208,7 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 23.88, + "tps_mean": 24.64, "tps_std": 0.01, "error": false, "error_type": null, @@ -911,8 +1221,8 @@ "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -923,8 +1233,8 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 121.82, - "tps_std": 0.35, + "tps_mean": 120.87, + "tps_std": 0.23, "error": false, "error_type": null, "backend": "ROCm", @@ -936,8 +1246,8 @@ "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -948,7 +1258,7 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 15.59, + "tps_mean": 15.86, "tps_std": 0.0, "error": false, "error_type": null, @@ -961,8 +1271,8 @@ "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -973,8 +1283,8 @@ "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 126.6, - "tps_std": 0.3, + "tps_mean": 128.65, + "tps_std": 0.59, "error": false, "error_type": null, "backend": "ROCm", @@ -986,8 +1296,8 @@ "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -998,8 +1308,8 @@ "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 15.62, - "tps_std": 0.04, + "tps_mean": 15.96, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -1011,8 +1321,8 @@ "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -1026,7 +1336,7 @@ "tps_mean": null, "tps_std": null, "error": true, - "error_type": "runtime", + "error_type": "hang", "backend": null, "ngl": null, "mmap": null, @@ -1048,7 +1358,7 @@ "tps_mean": null, "tps_std": null, "error": true, - "error_type": "hang", + "error_type": "runtime", "backend": null, "ngl": null, "mmap": null, @@ -1067,8 +1377,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 117.95, - "tps_std": 0.3, + "tps_mean": 120.53, + "tps_std": 0.28, "error": false, "error_type": null, "backend": "ROCm", @@ -1080,8 +1390,8 @@ "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -1092,8 +1402,8 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 15.65, - "tps_std": 0.01, + "tps_mean": 15.87, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -1105,8 +1415,8 @@ "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -1116,20 +1426,48 @@ "env_base": "rocm6_4_3", "env_variant": null, "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, + "test": "pp512", + "tps_mean": 129.22, + "tps_std": 0.41, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__fa1.log", - "build": null + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 15.95, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", @@ -1142,7 +1480,7 @@ "tps_mean": null, "tps_std": null, "error": true, - "error_type": "hang", + "error_type": "runtime", "backend": null, "ngl": null, "mmap": null, @@ -1160,55 +1498,33 @@ "env_base": "rocm6_4_3", "env_variant": "hblt0", "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, + "test": "pp512", + "tps_mean": 128.68, + "tps_std": 0.22, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__hblt0__fa1.log", - "build": null - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 69.19, - "tps_std": 0.2, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": true, "test": "tg128", - "tps_mean": 15.64, + "tps_mean": 15.96, "tps_std": 0.0, "error": false, "error_type": null, @@ -1219,22 +1535,22 @@ "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__hblt0__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma", + "env": "rocm7_rc-rocwmma-fa_all_quants", "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, + "env_variant": "rocwmma-fa_all_quants", + "fa": false, "test": "pp512", - "tps_mean": 114.61, - "tps_std": 0.2, + "tps_mean": 91.95, + "tps_std": 0.25, "error": false, "error_type": null, "backend": "ROCm", @@ -1244,21 +1560,21 @@ "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma", + "env": "rocm7_rc-rocwmma-fa_all_quants", "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, + "env_variant": "rocwmma-fa_all_quants", + "fa": false, "test": "tg128", - "tps_mean": 15.51, + "tps_mean": 15.76, "tps_std": 0.0, "error": false, "error_type": null, @@ -1269,121 +1585,21 @@ "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", + "env": "rocm7_rc-rocwmma-fa_all_quants", "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 120.88, - "tps_std": 0.92, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 15.61, - "tps_std": 0.09, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", + "env_variant": "rocwmma-fa_all_quants", "fa": true, "test": "pp512", - "tps_mean": 150.07, - "tps_std": 0.56, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 15.52, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 69.52, + "tps_mean": 70.0, "tps_std": 0.17, "error": false, "error_type": null, @@ -1394,21 +1610,21 @@ "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc", + "env": "rocm7_rc-rocwmma-fa_all_quants", "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, + "env_variant": "rocwmma-fa_all_quants", + "fa": true, "test": "tg128", - "tps_mean": 15.63, + "tps_mean": 15.98, "tps_std": 0.0, "error": false, "error_type": null, @@ -1419,407 +1635,757 @@ "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 74.02, - "tps_std": 0.13, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 15.73, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", + "env_variant": "rocwmma-fa_all_quants-hblt0", "fa": false, "test": "pp512", - "tps_mean": 142.67, - "tps_std": 0.75, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 15.68, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log", - "build": null - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 219.81, - "tps_std": 0.7, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 16.8, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 222.2, - "tps_std": 0.63, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 16.82, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 126.55, - "tps_std": 0.4, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 17.07, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 131.25, + "tps_mean": 134.22, "tps_std": 0.5, "error": false, "error_type": null, - "backend": "Vulkan", + "backend": "ROCm", "ngl": 99, "mmap": 0, "params_b": 110.47, "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": false, "test": "tg128", - "tps_mean": 17.31, + "tps_mean": 15.9, "tps_std": 0.0, "error": false, "error_type": null, - "backend": "Vulkan", + "backend": "ROCm", "ngl": 99, "mmap": 0, "params_b": 110.47, "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": false, + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": true, "test": "pp512", - "tps_mean": 98.02, - "tps_std": 0.18, + "tps_mean": 159.75, + "tps_std": 0.33, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log", + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": false, + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": true, "test": "tg128", - "tps_mean": 2.77, + "tps_mean": 15.99, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log", + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 92.18, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 15.92, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 101.83, + "tps_mean": 121.75, + "tps_std": 0.32, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 15.97, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 151.32, + "tps_std": 0.45, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 15.9, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 161.1, + "tps_std": 0.36, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 15.99, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 92.2, "tps_std": 0.11, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 15.85, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 71.02, + "tps_std": 0.16, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 15.96, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 147.32, + "tps_std": 0.43, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 15.91, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 161.37, + "tps_std": 0.36, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 15.99, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 264.5, + "tps_std": 0.99, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 17.27, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 267.86, + "tps_std": 1.22, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 17.28, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 208.01, + "tps_std": 0.73, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 17.49, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 221.63, + "tps_std": 1.26, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 17.71, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": false, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "hang", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 70.0, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log", + "build": null + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 101.82, + "tps_std": 0.06, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, "params_b": 70.55, "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -1830,7 +2396,7 @@ "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 2.77, + "tps_mean": 2.78, "tps_std": 0.0, "error": false, "error_type": null, @@ -1843,8 +2409,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -1899,8 +2465,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 97.13, - "tps_std": 0.17, + "tps_mean": 94.79, + "tps_std": 0.14, "error": false, "error_type": null, "backend": "ROCm", @@ -1912,8 +2478,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -1937,8 +2503,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -1949,7 +2515,7 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 80.42, + "tps_mean": 104.62, "tps_std": 0.08, "error": false, "error_type": null, @@ -1962,8 +2528,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -1987,8 +2553,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -2038,13 +2604,13 @@ { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma", + "env": "rocm7_rc-rocwmma-fa_all_quants", "env_base": "rocm7_rc", - "env_variant": "rocwmma", + "env_variant": "rocwmma-fa_all_quants", "fa": false, "test": "pp512", - "tps_mean": 97.31, - "tps_std": 0.2, + "tps_mean": 98.15, + "tps_std": 0.16, "error": false, "error_type": null, "backend": "ROCm", @@ -2054,70 +2620,20 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma", + "env": "rocm7_rc-rocwmma-fa_all_quants", "env_base": "rocm7_rc", - "env_variant": "rocwmma", + "env_variant": "rocwmma-fa_all_quants", "fa": false, "test": "tg128", - "tps_mean": 2.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 100.85, - "tps_std": 0.13, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", "tps_mean": 2.77, "tps_std": 0.0, "error": false, @@ -2129,21 +2645,71 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", + "env": "rocm7_rc-rocwmma-fa_all_quants", "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", + "env_variant": "rocwmma-fa_all_quants", + "fa": true, + "test": "pp512", + "tps_mean": 102.79, + "tps_std": 0.14, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": true, + "test": "tg128", + "tps_mean": 2.78, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", "fa": false, "test": "pp512", - "tps_mean": 93.0, + "tps_mean": 93.89, "tps_std": 0.22, "error": false, "error_type": null, @@ -2154,10 +2720,210 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 2.78, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 97.53, + "tps_std": 0.17, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 2.78, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 97.42, + "tps_std": 0.12, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 2.78, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 101.56, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 2.78, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 92.02, + "tps_std": 0.17, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -2181,8 +2947,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -2193,8 +2959,8 @@ "env_variant": "rocwmma-hblt0", "fa": true, "test": "pp512", - "tps_mean": 97.88, - "tps_std": 0.09, + "tps_mean": 97.1, + "tps_std": 0.17, "error": false, "error_type": null, "backend": "ROCm", @@ -2206,8 +2972,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -2218,7 +2984,7 @@ "env_variant": "rocwmma-hblt0", "fa": true, "test": "tg128", - "tps_mean": 2.77, + "tps_mean": 2.78, "tps_std": 0.0, "error": false, "error_type": null, @@ -2231,8 +2997,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -2243,154 +3009,210 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 99.41, + "tps_mean": 95.12, + "tps_std": 0.17, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 2.77, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 103.16, + "tps_std": 0.07, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 2.78, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 93.86, + "tps_std": 0.18, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 2.78, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 95.87, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 2.78, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 97.72, "tps_std": 0.36, "error": false, "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 2.77, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 70.0, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log", - "build": null - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 94.06, - "tps_std": 0.09, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 2.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 70.0, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", - "build": null - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 98.03, - "tps_std": 0.24, - "error": false, - "error_type": null, "backend": "Vulkan", "ngl": 99, "mmap": 0, @@ -2400,8 +3222,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -2412,7 +3234,7 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 2.78, + "tps_mean": 2.81, "tps_std": 0.0, "error": false, "error_type": null, @@ -2425,8 +3247,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -2437,8 +3259,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 99.12, - "tps_std": 0.25, + "tps_mean": 99.04, + "tps_std": 0.31, "error": false, "error_type": null, "backend": "Vulkan", @@ -2450,8 +3272,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -2462,7 +3284,7 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 2.77, + "tps_mean": 2.8, "tps_std": 0.0, "error": false, "error_type": null, @@ -2475,8 +3297,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -2487,8 +3309,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 75.59, - "tps_std": 0.28, + "tps_mean": 78.94, + "tps_std": 0.51, "error": false, "error_type": null, "backend": "Vulkan", @@ -2500,8 +3322,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -2525,8 +3347,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -2537,8 +3359,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 80.09, - "tps_std": 0.38, + "tps_mean": 80.9, + "tps_std": 0.77, "error": false, "error_type": null, "backend": "Vulkan", @@ -2550,8 +3372,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -2575,8 +3397,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -2586,11 +3408,111 @@ "env_base": "rocm6_4_3", "env_variant": "rocwmma", "fa": false, + "test": "pp512", + "tps_mean": 265.76, + "tps_std": 0.95, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 14.69, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 289.14, + "tps_std": 1.57, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 14.64, + "tps_std": 0.15, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": false, "test": null, "tps_mean": null, "tps_std": null, "error": true, - "error_type": "runtime", + "error_type": "hang", "backend": null, "ngl": null, "mmap": null, @@ -2598,109 +3520,9 @@ "file_size_gib": null, "name_params_b": 17.0, "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log", "build": null }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 291.08, - "tps_std": 1.26, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 14.53, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 134.19, - "tps_std": 1.49, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 14.56, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", @@ -2730,23 +3552,20 @@ "env_base": "rocm6_4_3", "env_variant": null, "fa": false, - "test": "pp512", - "tps_mean": 270.28, - "tps_std": 1.29, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "hang", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 17.0, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } + "build": null }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", @@ -2754,10 +3573,10 @@ "env": "rocm6_4_3", "env_base": "rocm6_4_3", "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 14.58, - "tps_std": 0.03, + "fa": true, + "test": "pp512", + "tps_mean": 291.67, + "tps_std": 0.91, "error": false, "error_type": null, "backend": "ROCm", @@ -2767,10 +3586,10 @@ "file_size_gib": 82.35, "name_params_b": 107.77, "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -2780,20 +3599,23 @@ "env_base": "rocm6_4_3", "env_variant": null, "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, + "test": "tg128", + "tps_mean": 14.71, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__fa1.log", - "build": null + "build": { + "hash": "f1fbffb5", + "number": "6486" + } }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", @@ -2839,6 +3661,206 @@ "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__hblt0__fa1.log", "build": null }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": false, + "test": "pp512", + "tps_mean": 276.44, + "tps_std": 1.46, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": false, + "test": "tg128", + "tps_mean": 14.55, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": true, + "test": "pp512", + "tps_mean": 292.67, + "tps_std": 1.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": true, + "test": "tg128", + "tps_mean": 14.71, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 273.88, + "tps_std": 1.14, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 14.7, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 284.81, + "tps_std": 1.55, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 14.72, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", @@ -2846,20 +3868,48 @@ "env_base": "rocm7_rc", "env_variant": "rocwmma", "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, + "test": "pp512", + "tps_mean": 274.13, + "tps_std": 0.84, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma.log", - "build": null + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 14.71, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", @@ -2869,8 +3919,8 @@ "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 285.84, - "tps_std": 9.41, + "tps_mean": 292.92, + "tps_std": 2.63, "error": false, "error_type": null, "backend": "ROCm", @@ -2882,8 +3932,8 @@ "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -2894,7 +3944,7 @@ "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 14.37, + "tps_mean": 14.71, "tps_std": 0.0, "error": false, "error_type": null, @@ -2907,8 +3957,8 @@ "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -2919,8 +3969,8 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "pp512", - "tps_mean": 273.97, - "tps_std": 1.67, + "tps_mean": 273.23, + "tps_std": 1.35, "error": false, "error_type": null, "backend": "ROCm", @@ -2932,8 +3982,8 @@ "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -2944,8 +3994,8 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "tg128", - "tps_mean": 14.57, - "tps_std": 0.05, + "tps_mean": 14.7, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -2957,8 +4007,8 @@ "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -2968,48 +4018,20 @@ "env_base": "rocm7_rc", "env_variant": "rocwmma-hblt0", "fa": true, - "test": "pp512", - "tps_mean": 285.26, - "tps_std": 1.79, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "hang", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 17.0, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 14.33, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } + "build": null }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", @@ -3018,48 +4040,20 @@ "env_base": "rocm7_rc", "env_variant": null, "fa": false, - "test": "pp512", - "tps_mean": 276.37, - "tps_std": 1.65, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 17.0, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 14.57, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "de219279", - "number": "6181" - } + "build": null }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", @@ -3072,7 +4066,7 @@ "tps_mean": null, "tps_std": null, "error": true, - "error_type": "runtime", + "error_type": "hang", "backend": null, "ngl": null, "mmap": null, @@ -3091,8 +4085,8 @@ "env_variant": "hblt0", "fa": false, "test": "pp512", - "tps_mean": 269.17, - "tps_std": 0.99, + "tps_mean": 274.52, + "tps_std": 1.78, "error": false, "error_type": null, "backend": "ROCm", @@ -3104,8 +4098,8 @@ "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -3116,8 +4110,8 @@ "env_variant": "hblt0", "fa": false, "test": "tg128", - "tps_mean": 14.63, - "tps_std": 0.01, + "tps_mean": 14.7, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -3129,8 +4123,8 @@ "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -3140,20 +4134,48 @@ "env_base": "rocm7_rc", "env_variant": "hblt0", "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, + "test": "pp512", + "tps_mean": 287.04, + "tps_std": 1.92, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1.log", - "build": null + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 14.71, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", @@ -3163,8 +4185,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 242.07, - "tps_std": 1.05, + "tps_mean": 224.02, + "tps_std": 2.86, "error": false, "error_type": null, "backend": "Vulkan", @@ -3176,8 +4198,8 @@ "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -3188,57 +4210,7 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 15.56, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 244.49, - "tps_std": 1.13, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 15.33, + "tps_mean": 15.98, "tps_std": 0.0, "error": false, "error_type": null, @@ -3249,71 +4221,21 @@ "file_size_gib": 82.35, "name_params_b": 107.77, "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 147.08, - "tps_std": 0.98, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 15.5, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "vulkan_radv", - "env_base": "vulkan_radv", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 149.97, + "tps_mean": 234.3, "tps_std": 1.1, "error": false, "error_type": null, @@ -3324,10 +4246,110 @@ "file_size_gib": 82.35, "name_params_b": 107.77, "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 15.75, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 201.49, + "tps_std": 2.22, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 15.77, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 202.49, + "tps_std": 5.98, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -3338,7 +4360,7 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 15.49, + "tps_mean": 15.74, "tps_std": 0.0, "error": false, "error_type": null, @@ -3351,8 +4373,8 @@ "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -3363,8 +4385,8 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 270.35, - "tps_std": 3.39, + "tps_mean": 264.44, + "tps_std": 24.69, "error": false, "error_type": null, "backend": "ROCm", @@ -3376,8 +4398,8 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -3388,8 +4410,8 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 11.78, - "tps_std": 0.03, + "tps_mean": 11.88, + "tps_std": 0.05, "error": false, "error_type": null, "backend": "ROCm", @@ -3401,8 +4423,8 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -3413,8 +4435,8 @@ "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 292.23, - "tps_std": 3.13, + "tps_mean": 298.83, + "tps_std": 1.59, "error": false, "error_type": null, "backend": "ROCm", @@ -3426,8 +4448,8 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -3438,8 +4460,8 @@ "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 11.73, - "tps_std": 0.03, + "tps_mean": 11.89, + "tps_std": 0.06, "error": false, "error_type": null, "backend": "ROCm", @@ -3451,8 +4473,8 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -3484,56 +4506,6 @@ "env_base": "rocm6_4_3", "env_variant": "rocwmma-hblt0", "fa": true, - "test": "pp512", - "tps_mean": 140.27, - "tps_std": 0.97, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 11.74, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, "test": null, "tps_mean": null, "tps_std": null, @@ -3546,9 +4518,59 @@ "file_size_gib": null, "name_params_b": 17.0, "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log", "build": null }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 274.49, + "tps_std": 1.84, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 11.91, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", @@ -3556,20 +4578,48 @@ "env_base": "rocm6_4_3", "env_variant": null, "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, + "test": "pp512", + "tps_mean": 298.07, + "tps_std": 2.73, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__fa1.log", - "build": null + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 11.89, + "tps_std": 0.06, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", @@ -3604,7 +4654,7 @@ "tps_mean": null, "tps_std": null, "error": true, - "error_type": "runtime", + "error_type": "hang", "backend": null, "ngl": null, "mmap": null, @@ -3618,13 +4668,13 @@ { "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-rocwmma", + "env": "rocm7_rc-rocwmma-fa_all_quants", "env_base": "rocm7_rc", - "env_variant": "rocwmma", + "env_variant": "rocwmma-fa_all_quants", "fa": false, "test": "pp512", - "tps_mean": 279.13, - "tps_std": 2.9, + "tps_mean": 275.21, + "tps_std": 1.93, "error": false, "error_type": null, "backend": "ROCm", @@ -3634,22 +4684,22 @@ "file_size_gib": 106.65, "name_params_b": 107.77, "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-rocwmma", + "env": "rocm7_rc-rocwmma-fa_all_quants", "env_base": "rocm7_rc", - "env_variant": "rocwmma", + "env_variant": "rocwmma-fa_all_quants", "fa": false, "test": "tg128", - "tps_mean": 11.79, - "tps_std": 0.07, + "tps_mean": 11.85, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -3659,22 +4709,22 @@ "file_size_gib": 106.65, "name_params_b": 107.77, "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-rocwmma", + "env": "rocm7_rc-rocwmma-fa_all_quants", "env_base": "rocm7_rc", - "env_variant": "rocwmma", + "env_variant": "rocwmma-fa_all_quants", "fa": true, "test": "pp512", - "tps_mean": 293.6, - "tps_std": 3.84, + "tps_mean": 292.69, + "tps_std": 2.25, "error": false, "error_type": null, "backend": "ROCm", @@ -3684,22 +4734,22 @@ "file_size_gib": 106.65, "name_params_b": 107.77, "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-rocwmma", + "env": "rocm7_rc-rocwmma-fa_all_quants", "env_base": "rocm7_rc", - "env_variant": "rocwmma", + "env_variant": "rocwmma-fa_all_quants", "fa": true, "test": "tg128", - "tps_mean": 11.62, - "tps_std": 0.02, + "tps_mean": 11.91, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -3709,22 +4759,22 @@ "file_size_gib": 106.65, "name_params_b": 107.77, "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-rocwmma-hblt0", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", + "env_variant": "rocwmma-fa_all_quants-hblt0", "fa": false, "test": "pp512", - "tps_mean": 264.02, - "tps_std": 2.74, + "tps_mean": 267.51, + "tps_std": 12.72, "error": false, "error_type": null, "backend": "ROCm", @@ -3734,71 +4784,21 @@ "file_size_gib": 106.65, "name_params_b": 107.77, "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-rocwmma-hblt0", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", + "env_variant": "rocwmma-fa_all_quants-hblt0", "fa": false, "test": "tg128", - "tps_mean": 11.79, - "tps_std": 0.06, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 279.69, - "tps_std": 2.3, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 11.6, + "tps_mean": 11.9, "tps_std": 0.04, "error": false, "error_type": null, @@ -3809,10 +4809,260 @@ "file_size_gib": 106.65, "name_params_b": 107.77, "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 286.25, + "tps_std": 4.29, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 11.9, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 260.6, + "tps_std": 10.8, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 11.82, + "tps_std": 0.22, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 293.26, + "tps_std": 3.75, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 11.92, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 275.91, + "tps_std": 1.81, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 11.91, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 260.83, + "tps_std": 5.18, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 11.82, + "tps_std": 0.2, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -3823,8 +5073,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 252.38, - "tps_std": 7.7, + "tps_mean": 279.56, + "tps_std": 3.76, "error": false, "error_type": null, "backend": "ROCm", @@ -3836,8 +5086,8 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -3848,8 +5098,8 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 11.35, - "tps_std": 0.6, + "tps_mean": 11.88, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "ROCm", @@ -3861,8 +5111,8 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -3876,7 +5126,7 @@ "tps_mean": null, "tps_std": null, "error": true, - "error_type": "runtime", + "error_type": "hang", "backend": null, "ngl": null, "mmap": null, @@ -3895,8 +5145,8 @@ "env_variant": "hblt0", "fa": false, "test": "pp512", - "tps_mean": 271.54, - "tps_std": 4.1, + "tps_mean": 278.61, + "tps_std": 2.47, "error": false, "error_type": null, "backend": "ROCm", @@ -3908,8 +5158,8 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -3920,8 +5170,8 @@ "env_variant": "hblt0", "fa": false, "test": "tg128", - "tps_mean": 11.57, - "tps_std": 0.58, + "tps_mean": 11.92, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -3933,8 +5183,8 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -3948,7 +5198,7 @@ "tps_mean": null, "tps_std": null, "error": true, - "error_type": "runtime", + "error_type": "hang", "backend": null, "ngl": null, "mmap": null, @@ -3967,8 +5217,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 258.54, - "tps_std": 1.39, + "tps_mean": 343.36, + "tps_std": 1.37, "error": false, "error_type": null, "backend": "Vulkan", @@ -3980,8 +5230,8 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -3992,7 +5242,7 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 12.45, + "tps_mean": 12.57, "tps_std": 0.01, "error": false, "error_type": null, @@ -4005,8 +5255,8 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -4017,8 +5267,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 262.84, - "tps_std": 1.39, + "tps_mean": 347.56, + "tps_std": 1.15, "error": false, "error_type": null, "backend": "Vulkan", @@ -4030,8 +5280,8 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -4042,7 +5292,7 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 12.3, + "tps_mean": 12.42, "tps_std": 0.01, "error": false, "error_type": null, @@ -4055,8 +5305,8 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -4067,8 +5317,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 169.23, - "tps_std": 0.84, + "tps_mean": 244.52, + "tps_std": 1.08, "error": false, "error_type": null, "backend": "Vulkan", @@ -4080,8 +5330,8 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -4092,7 +5342,7 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 12.45, + "tps_mean": 12.57, "tps_std": 0.0, "error": false, "error_type": null, @@ -4105,8 +5355,8 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -4117,8 +5367,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 173.79, - "tps_std": 0.85, + "tps_mean": 253.13, + "tps_std": 1.36, "error": false, "error_type": null, "backend": "Vulkan", @@ -4130,8 +5380,8 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -4142,8 +5392,8 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 12.44, - "tps_std": 0.01, + "tps_mean": 12.56, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", @@ -4155,8 +5405,8 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -4166,306 +5416,6 @@ "env_base": "rocm6_4_3", "env_variant": "rocwmma", "fa": false, - "test": "pp512", - "tps_mean": 285.51, - "tps_std": 1.64, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 17.7, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 308.62, - "tps_std": 2.62, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 17.54, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 137.71, - "tps_std": 0.62, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 17.71, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 142.62, - "tps_std": 0.82, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 17.55, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 286.37, - "tps_std": 1.44, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 17.7, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 234.68, - "tps_std": 1.31, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 17.71, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": false, "test": null, "tps_mean": null, "tps_std": null, @@ -4478,8 +5428,308 @@ "file_size_gib": null, "name_params_b": 17.0, "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log", + "build": null + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 307.79, + "tps_std": 3.48, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 17.81, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 138.51, + "tps_std": 0.72, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 17.8, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 142.41, + "tps_std": 0.57, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 17.8, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 282.5, + "tps_std": 1.23, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 17.78, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 306.89, + "tps_std": 1.35, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 17.82, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 137.98, + "tps_std": 0.67, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0.log", - "build": null + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 17.79, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } }, { "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", @@ -4488,11 +5738,461 @@ "env_base": "rocm6_4_3", "env_variant": "hblt0", "fa": true, + "test": "pp512", + "tps_mean": 143.18, + "tps_std": 0.45, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 17.81, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": false, + "test": "pp512", + "tps_mean": 283.86, + "tps_std": 1.18, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": false, + "test": "tg128", + "tps_mean": 17.71, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": true, + "test": "pp512", + "tps_mean": 299.13, + "tps_std": 2.14, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": true, + "test": "tg128", + "tps_mean": 17.77, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 286.66, + "tps_std": 1.37, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 17.79, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 300.0, + "tps_std": 1.51, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 17.82, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 284.38, + "tps_std": 0.76, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 17.82, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 306.4, + "tps_std": 1.77, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 17.78, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 286.08, + "tps_std": 2.96, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 17.82, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 297.71, + "tps_std": 1.73, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 17.78, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, "test": null, "tps_mean": null, "tps_std": null, "error": true, - "error_type": "hang", + "error_type": "runtime", "backend": null, "ngl": null, "mmap": null, @@ -4500,259 +6200,9 @@ "file_size_gib": null, "name_params_b": 17.0, "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log", "build": null }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 290.54, - "tps_std": 1.59, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 17.67, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 304.99, - "tps_std": 0.37, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 17.28, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 283.93, - "tps_std": 1.57, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 17.65, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 300.13, - "tps_std": 1.26, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 17.27, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 291.6, - "tps_std": 1.95, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 17.73, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, { "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", @@ -4783,8 +6233,8 @@ "env_variant": "hblt0", "fa": false, "test": "pp512", - "tps_mean": 285.56, - "tps_std": 1.41, + "tps_mean": 284.17, + "tps_std": 2.14, "error": false, "error_type": null, "backend": "ROCm", @@ -4796,8 +6246,8 @@ "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -4808,7 +6258,7 @@ "env_variant": "hblt0", "fa": false, "test": "tg128", - "tps_mean": 17.72, + "tps_mean": 17.8, "tps_std": 0.0, "error": false, "error_type": null, @@ -4821,8 +6271,8 @@ "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -4833,110 +6283,110 @@ "env_variant": "hblt0", "fa": true, "test": "pp512", - "tps_mean": 227.75, + "tps_mean": 300.96, + "tps_std": 1.85, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 17.81, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 191.71, + "tps_std": 1.02, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 21.03, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 193.39, "tps_std": 1.52, "error": false, "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 17.73, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 216.64, - "tps_std": 2.76, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 20.39, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 217.68, - "tps_std": 4.15, - "error": false, - "error_type": null, "backend": "Vulkan", "ngl": 99, "mmap": 0, @@ -4946,8 +6396,8 @@ "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -4958,7 +6408,7 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 19.97, + "tps_mean": 20.61, "tps_std": 0.01, "error": false, "error_type": null, @@ -4971,8 +6421,8 @@ "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -4983,8 +6433,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 151.98, - "tps_std": 0.6, + "tps_mean": 213.71, + "tps_std": 2.99, "error": false, "error_type": null, "backend": "Vulkan", @@ -4996,8 +6446,8 @@ "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -5008,8 +6458,8 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 20.26, - "tps_std": 0.02, + "tps_mean": 20.87, + "tps_std": 0.03, "error": false, "error_type": null, "backend": "Vulkan", @@ -5021,8 +6471,8 @@ "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -5033,8 +6483,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 154.96, - "tps_std": 0.82, + "tps_mean": 217.08, + "tps_std": 5.59, "error": false, "error_type": null, "backend": "Vulkan", @@ -5046,8 +6496,8 @@ "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -5058,7 +6508,7 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 20.28, + "tps_mean": 20.85, "tps_std": 0.01, "error": false, "error_type": null, @@ -5071,8 +6521,8 @@ "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -5083,8 +6533,8 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 130.11, - "tps_std": 0.68, + "tps_mean": 129.22, + "tps_std": 0.43, "error": false, "error_type": null, "backend": "ROCm", @@ -5096,8 +6546,8 @@ "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -5108,8 +6558,8 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 13.95, - "tps_std": 0.04, + "tps_mean": 14.25, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -5121,8 +6571,8 @@ "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -5133,8 +6583,8 @@ "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 144.31, - "tps_std": 0.8, + "tps_mean": 143.48, + "tps_std": 1.3, "error": false, "error_type": null, "backend": "ROCm", @@ -5146,8 +6596,8 @@ "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -5158,7 +6608,7 @@ "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 13.71, + "tps_mean": 14.32, "tps_std": 0.0, "error": false, "error_type": null, @@ -5171,8 +6621,8 @@ "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -5204,20 +6654,48 @@ "env_base": "rocm6_4_3", "env_variant": "rocwmma-hblt0", "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 235.0, + "test": "pp512", + "tps_mean": 75.22, + "tps_std": 0.16, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log", - "build": null + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 14.29, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } }, { "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", @@ -5227,8 +6705,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 131.78, - "tps_std": 1.03, + "tps_mean": 129.64, + "tps_std": 0.35, "error": false, "error_type": null, "backend": "ROCm", @@ -5240,8 +6718,8 @@ "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -5252,8 +6730,8 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 13.68, - "tps_std": 0.43, + "tps_mean": 14.24, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -5265,8 +6743,8 @@ "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -5276,20 +6754,48 @@ "env_base": "rocm6_4_3", "env_variant": null, "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 235.0, + "test": "pp512", + "tps_mean": 144.82, + "tps_std": 0.84, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__fa1.log", - "build": null + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 14.32, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } }, { "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", @@ -5298,20 +6804,23 @@ "env_base": "rocm6_4_3", "env_variant": "hblt0", "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 235.0, + "test": "pp512", + "tps_mean": 74.17, + "tps_std": 0.11, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__hblt0.log", - "build": null + "build": { + "hash": "f1fbffb5", + "number": "6486" + } }, { "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", @@ -5319,228 +6828,9 @@ "env": "rocm6_4_3-hblt0", "env_base": "rocm6_4_3", "env_variant": "hblt0", - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 235.0, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__hblt0__fa1.log", - "build": null - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 235.0, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma.log", - "build": null - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 141.61, - "tps_std": 0.92, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 13.34, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 133.33, - "tps_std": 0.68, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", "fa": false, "test": "tg128", - "tps_mean": 13.78, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 139.6, - "tps_std": 0.47, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 13.03, - "tps_std": 0.57, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 134.95, - "tps_std": 0.76, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 13.99, + "tps_mean": 14.27, "tps_std": 0.01, "error": false, "error_type": null, @@ -5551,10 +6841,510 @@ "file_size_gib": 96.99, "name_params_b": 235.09, "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 77.91, + "tps_std": 0.23, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 14.31, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": false, + "test": "pp512", + "tps_mean": 131.98, + "tps_std": 0.86, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": false, + "test": "tg128", + "tps_mean": 14.14, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": true, + "test": "pp512", + "tps_mean": 141.08, + "tps_std": 0.51, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": true, + "test": "tg128", + "tps_mean": 14.29, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 130.87, + "tps_std": 0.83, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 14.25, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 137.23, + "tps_std": 0.55, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 14.32, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 132.6, + "tps_std": 0.56, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 14.33, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 143.76, + "tps_std": 0.58, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 14.36, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 134.24, + "tps_std": 0.57, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 14.32, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 141.84, + "tps_std": 0.84, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 14.37, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 134.45, + "tps_std": 0.5, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 14.32, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -5564,20 +7354,48 @@ "env_base": "rocm7_rc", "env_variant": null, "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 235.0, + "test": "pp512", + "tps_mean": 145.01, + "tps_std": 0.84, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log", - "build": null + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 14.36, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } }, { "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", @@ -5587,8 +7405,8 @@ "env_variant": "hblt0", "fa": false, "test": "pp512", - "tps_mean": 135.29, - "tps_std": 0.51, + "tps_mean": 133.4, + "tps_std": 0.48, "error": false, "error_type": null, "backend": "ROCm", @@ -5600,8 +7418,8 @@ "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -5612,7 +7430,7 @@ "env_variant": "hblt0", "fa": false, "test": "tg128", - "tps_mean": 13.97, + "tps_mean": 14.32, "tps_std": 0.04, "error": false, "error_type": null, @@ -5625,8 +7443,8 @@ "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -5659,8 +7477,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 112.93, - "tps_std": 0.63, + "tps_mean": 131.33, + "tps_std": 1.43, "error": false, "error_type": null, "backend": "Vulkan", @@ -5672,8 +7490,8 @@ "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -5684,7 +7502,7 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 16.43, + "tps_mean": 17.27, "tps_std": 0.01, "error": false, "error_type": null, @@ -5697,8 +7515,8 @@ "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -5709,8 +7527,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 114.35, - "tps_std": 1.12, + "tps_mean": 133.32, + "tps_std": 1.63, "error": false, "error_type": null, "backend": "Vulkan", @@ -5722,8 +7540,8 @@ "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -5734,7 +7552,7 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 16.27, + "tps_mean": 17.12, "tps_std": 0.01, "error": false, "error_type": null, @@ -5747,8 +7565,8 @@ "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -5759,8 +7577,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 64.6, - "tps_std": 0.38, + "tps_mean": 115.77, + "tps_std": 1.42, "error": false, "error_type": null, "backend": "Vulkan", @@ -5772,8 +7590,8 @@ "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -5784,7 +7602,7 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 17.03, + "tps_mean": 17.75, "tps_std": 0.01, "error": false, "error_type": null, @@ -5797,8 +7615,8 @@ "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -5809,8 +7627,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 66.6, - "tps_std": 0.42, + "tps_mean": 121.8, + "tps_std": 1.81, "error": false, "error_type": null, "backend": "Vulkan", @@ -5822,8 +7640,8 @@ "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -5834,8 +7652,8 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 17.28, - "tps_std": 0.01, + "tps_mean": 18.1, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", @@ -5847,8 +7665,8 @@ "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -5859,8 +7677,8 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 438.42, - "tps_std": 4.14, + "tps_mean": 426.32, + "tps_std": 6.04, "error": false, "error_type": null, "backend": "ROCm", @@ -5872,8 +7690,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -5884,7 +7702,7 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 24.57, + "tps_mean": 25.0, "tps_std": 0.01, "error": false, "error_type": null, @@ -5897,8 +7715,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -5909,8 +7727,8 @@ "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 475.43, - "tps_std": 7.4, + "tps_mean": 479.22, + "tps_std": 4.77, "error": false, "error_type": null, "backend": "ROCm", @@ -5922,8 +7740,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -5934,8 +7752,8 @@ "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 24.08, - "tps_std": 0.0, + "tps_mean": 24.91, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -5947,8 +7765,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -5959,8 +7777,8 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "pp512", - "tps_mean": 158.13, - "tps_std": 2.4, + "tps_mean": 158.56, + "tps_std": 4.2, "error": false, "error_type": null, "backend": "ROCm", @@ -5972,8 +7790,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -5984,7 +7802,7 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "tg128", - "tps_mean": 24.58, + "tps_mean": 25.03, "tps_std": 0.0, "error": false, "error_type": null, @@ -5997,8 +7815,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -6009,8 +7827,8 @@ "env_variant": "rocwmma-hblt0", "fa": true, "test": "pp512", - "tps_mean": 163.4, - "tps_std": 3.21, + "tps_mean": 165.57, + "tps_std": 2.56, "error": false, "error_type": null, "backend": "ROCm", @@ -6022,8 +7840,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -6034,7 +7852,7 @@ "env_variant": "rocwmma-hblt0", "fa": true, "test": "tg128", - "tps_mean": 24.14, + "tps_mean": 24.89, "tps_std": 0.0, "error": false, "error_type": null, @@ -6047,8 +7865,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -6059,8 +7877,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 441.36, - "tps_std": 3.35, + "tps_mean": 432.19, + "tps_std": 6.24, "error": false, "error_type": null, "backend": "ROCm", @@ -6072,8 +7890,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -6084,8 +7902,8 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 24.6, - "tps_std": 0.01, + "tps_mean": 25.02, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -6097,8 +7915,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -6109,8 +7927,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 337.36, - "tps_std": 3.48, + "tps_mean": 477.24, + "tps_std": 5.25, "error": false, "error_type": null, "backend": "ROCm", @@ -6122,8 +7940,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -6134,7 +7952,7 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 24.45, + "tps_mean": 24.88, "tps_std": 0.0, "error": false, "error_type": null, @@ -6147,8 +7965,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -6159,8 +7977,8 @@ "env_variant": "hblt0", "fa": false, "test": "pp512", - "tps_mean": 161.73, - "tps_std": 1.23, + "tps_mean": 162.44, + "tps_std": 4.25, "error": false, "error_type": null, "backend": "ROCm", @@ -6172,8 +7990,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -6184,7 +8002,7 @@ "env_variant": "hblt0", "fa": false, "test": "tg128", - "tps_mean": 24.58, + "tps_mean": 25.05, "tps_std": 0.01, "error": false, "error_type": null, @@ -6197,8 +8015,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -6209,8 +8027,8 @@ "env_variant": "hblt0", "fa": true, "test": "pp512", - "tps_mean": 143.05, - "tps_std": 2.1, + "tps_mean": 160.17, + "tps_std": 3.26, "error": false, "error_type": null, "backend": "ROCm", @@ -6222,8 +8040,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -6234,7 +8052,7 @@ "env_variant": "hblt0", "fa": true, "test": "tg128", - "tps_mean": 24.42, + "tps_mean": 24.91, "tps_std": 0.01, "error": false, "error_type": null, @@ -6247,20 +8065,20 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { "model": "Qwen3-30B-A3B-BF16-00001-of-00002", "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-rocwmma", + "env": "rocm7_rc-rocwmma-fa_all_quants", "env_base": "rocm7_rc", - "env_variant": "rocwmma", + "env_variant": "rocwmma-fa_all_quants", "fa": false, "test": "pp512", - "tps_mean": 448.63, - "tps_std": 5.9, + "tps_mean": 435.53, + "tps_std": 2.47, "error": false, "error_type": null, "backend": "ROCm", @@ -6270,290 +8088,18 @@ "file_size_gib": 56.89, "name_params_b": 30.53, "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma.log", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { "model": "Qwen3-30B-A3B-BF16-00001-of-00002", "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-rocwmma", + "env": "rocm7_rc-rocwmma-fa_all_quants", "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 24.96, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 473.34, - "tps_std": 8.6, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 23.99, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 338.07, - "tps_std": 3.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 24.93, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 342.57, - "tps_std": 3.12, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 23.97, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 444.3, - "tps_std": 6.78, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 24.66, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log", - "build": null - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 333.42, - "tps_std": 6.83, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", + "env_variant": "rocwmma-fa_all_quants", "fa": false, "test": "tg128", "tps_mean": 24.69, @@ -6567,943 +8113,21 @@ "file_size_gib": 56.89, "name_params_b": 30.53, "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0.log", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { "model": "Qwen3-30B-A3B-BF16-00001-of-00002", "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-hblt0", + "env": "rocm7_rc-rocwmma-fa_all_quants", "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 30.0, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log", - "build": null - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 106.47, - "tps_std": 0.1, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 8.18, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, + "env_variant": "rocwmma-fa_all_quants", "fa": true, "test": "pp512", - "tps_mean": 106.77, - "tps_std": 0.12, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 8.11, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 84.71, - "tps_std": 0.11, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 7.52, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 85.7, - "tps_std": 0.1, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 7.52, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 564.83, - "tps_std": 6.58, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 50.68, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 624.99, - "tps_std": 3.81, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 48.64, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 389.25, - "tps_std": 2.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 50.66, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 412.18, - "tps_std": 1.15, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 48.8, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 562.86, - "tps_std": 10.14, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 50.74, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 418.07, - "tps_std": 1.65, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 50.11, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 387.74, - "tps_std": 1.7, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 50.65, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 301.31, - "tps_std": 0.65, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 50.37, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 570.31, - "tps_std": 5.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 50.52, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 612.79, - "tps_std": 4.77, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 46.73, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 572.09, - "tps_std": 8.22, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 50.45, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 605.49, - "tps_std": 1.47, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 46.73, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 573.05, - "tps_std": 6.77, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 50.8, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 416.05, - "tps_std": 3.44, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 50.33, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 576.38, + "tps_mean": 476.36, "tps_std": 3.91, "error": false, "error_type": null, @@ -7511,13 +8135,1663 @@ "ngl": 99, "mmap": 0, "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": true, + "test": "tg128", + "tps_mean": 24.93, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 330.47, + "tps_std": 5.12, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 25.09, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 343.19, + "tps_std": 4.41, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 24.9, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 431.59, + "tps_std": 5.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 25.06, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 476.09, + "tps_std": 5.36, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 24.93, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 332.32, + "tps_std": 3.6, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 25.11, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 344.55, + "tps_std": 3.84, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 24.92, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 431.29, + "tps_std": 3.17, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 25.1, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 475.35, + "tps_std": 3.41, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 24.94, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 329.24, + "tps_std": 2.98, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 25.06, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 348.53, + "tps_std": 5.6, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 24.92, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 139.51, + "tps_std": 0.9, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 8.31, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 140.62, + "tps_std": 1.53, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 8.26, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 150.84, + "tps_std": 1.38, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 8.24, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 156.53, + "tps_std": 2.33, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 8.29, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 538.66, + "tps_std": 2.16, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 53.01, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 620.78, + "tps_std": 3.75, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 52.74, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 389.41, + "tps_std": 1.99, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 53.13, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 413.64, + "tps_std": 1.55, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 52.76, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 543.05, + "tps_std": 4.56, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 53.14, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 624.71, + "tps_std": 4.66, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 52.74, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 387.73, + "tps_std": 2.27, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 53.3, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 415.19, + "tps_std": 1.76, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 52.59, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": false, + "test": "pp512", + "tps_mean": 552.48, + "tps_std": 1.39, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": false, + "test": "tg128", + "tps_mean": 53.22, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": true, + "test": "pp512", + "tps_mean": 618.51, + "tps_std": 8.44, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": true, + "test": "tg128", + "tps_mean": 52.82, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 576.87, + "tps_std": 7.86, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 53.42, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 609.51, + "tps_std": 4.26, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 52.77, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 555.3, + "tps_std": 3.11, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 53.34, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 618.71, + "tps_std": 2.77, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 52.77, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 575.05, + "tps_std": 4.27, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 53.33, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 618.89, + "tps_std": 4.53, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 52.69, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 549.65, + "tps_std": 6.16, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 53.42, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 621.8, + "tps_std": 7.09, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 52.78, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 575.05, + "tps_std": 3.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, "file_size_gib": 24.53, "name_params_b": 30.53, "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -7528,7 +9802,7 @@ "env_variant": "hblt0", "fa": false, "test": "tg128", - "tps_mean": 50.85, + "tps_mean": 53.42, "tps_std": 0.01, "error": false, "error_type": null, @@ -7541,8 +9815,8 @@ "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -7553,8 +9827,8 @@ "env_variant": "hblt0", "fa": true, "test": "pp512", - "tps_mean": 414.62, - "tps_std": 3.23, + "tps_mean": 614.05, + "tps_std": 4.83, "error": false, "error_type": null, "backend": "ROCm", @@ -7566,8 +9840,8 @@ "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -7578,7 +9852,7 @@ "env_variant": "hblt0", "fa": true, "test": "tg128", - "tps_mean": 50.22, + "tps_mean": 52.83, "tps_std": 0.01, "error": false, "error_type": null, @@ -7591,8 +9865,8 @@ "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -7603,8 +9877,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 733.4, - "tps_std": 2.59, + "tps_mean": 1027.23, + "tps_std": 5.64, "error": false, "error_type": null, "backend": "Vulkan", @@ -7616,8 +9890,8 @@ "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -7628,8 +9902,8 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 59.36, - "tps_std": 0.05, + "tps_mean": 63.42, + "tps_std": 0.03, "error": false, "error_type": null, "backend": "Vulkan", @@ -7641,8 +9915,8 @@ "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -7653,8 +9927,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 725.54, - "tps_std": 2.84, + "tps_mean": 1005.86, + "tps_std": 4.35, "error": false, "error_type": null, "backend": "Vulkan", @@ -7666,8 +9940,8 @@ "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -7678,8 +9952,8 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 55.57, - "tps_std": 0.02, + "tps_mean": 59.12, + "tps_std": 0.04, "error": false, "error_type": null, "backend": "Vulkan", @@ -7691,8 +9965,8 @@ "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -7703,8 +9977,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 392.54, - "tps_std": 1.8, + "tps_mean": 764.63, + "tps_std": 3.75, "error": false, "error_type": null, "backend": "Vulkan", @@ -7716,8 +9990,8 @@ "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -7728,8 +10002,8 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 61.56, - "tps_std": 0.02, + "tps_mean": 64.77, + "tps_std": 0.1, "error": false, "error_type": null, "backend": "Vulkan", @@ -7741,8 +10015,8 @@ "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -7753,8 +10027,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 403.74, - "tps_std": 1.69, + "tps_mean": 801.22, + "tps_std": 4.04, "error": false, "error_type": null, "backend": "Vulkan", @@ -7766,8 +10040,8 @@ "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -7778,8 +10052,8 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 60.57, - "tps_std": 0.08, + "tps_mean": 63.44, + "tps_std": 0.12, "error": false, "error_type": null, "backend": "Vulkan", @@ -7791,8 +10065,8 @@ "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -7803,557 +10077,7 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 734.26, - "tps_std": 0.94, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 14.05, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 820.41, - "tps_std": 1.59, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 13.77, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 213.4, - "tps_std": 3.62, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 14.04, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 224.2, - "tps_std": 4.73, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 13.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 734.7, - "tps_std": 1.48, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 14.03, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 554.49, - "tps_std": 0.62, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 13.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 220.22, - "tps_std": 1.6, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 14.04, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 193.9, - "tps_std": 1.19, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 13.77, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 751.04, - "tps_std": 1.24, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 14.01, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 811.04, - "tps_std": 1.22, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 13.45, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 752.99, - "tps_std": 1.44, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 14.0, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 794.9, + "tps_mean": 726.41, "tps_std": 1.42, "error": false, "error_type": null, @@ -8364,21 +10088,171 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { "model": "gemma-3-12b-it-UD-Q8_K_XL", "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 14.15, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 822.38, + "tps_std": 0.84, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 13.87, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 219.78, + "tps_std": 3.65, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 14.15, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 227.29, + "tps_std": 2.29, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", "env_variant": "rocwmma-hblt0", "fa": true, "test": "tg128", - "tps_mean": 13.45, + "tps_mean": 13.87, "tps_std": 0.0, "error": false, "error_type": null, @@ -8389,22 +10263,22 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { "model": "gemma-3-12b-it-UD-Q8_K_XL", "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 752.36, - "tps_std": 0.48, + "tps_mean": 703.97, + "tps_std": 0.49, "error": false, "error_type": null, "backend": "ROCm", @@ -8414,21 +10288,21 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { "model": "gemma-3-12b-it-UD-Q8_K_XL", "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 14.05, + "tps_mean": 14.15, "tps_std": 0.0, "error": false, "error_type": null, @@ -8439,22 +10313,22 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { "model": "gemma-3-12b-it-UD-Q8_K_XL", "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 569.66, - "tps_std": 0.6, + "tps_mean": 803.68, + "tps_std": 0.98, "error": false, "error_type": null, "backend": "ROCm", @@ -8464,21 +10338,21 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { "model": "gemma-3-12b-it-UD-Q8_K_XL", "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 13.78, + "tps_mean": 13.89, "tps_std": 0.0, "error": false, "error_type": null, @@ -8489,21 +10363,171 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { "model": "gemma-3-12b-it-UD-Q8_K_XL", "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", "env_variant": "hblt0", "fa": false, "test": "pp512", - "tps_mean": 750.36, + "tps_mean": 222.73, + "tps_std": 0.32, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 14.15, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 220.75, + "tps_std": 2.43, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 13.86, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": false, + "test": "pp512", + "tps_mean": 714.52, + "tps_std": 1.47, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": false, + "test": "tg128", + "tps_mean": 14.16, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": true, + "test": "pp512", + "tps_mean": 810.36, "tps_std": 1.88, "error": false, "error_type": null, @@ -8514,10 +10538,460 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": true, + "test": "tg128", + "tps_mean": 13.89, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 752.18, + "tps_std": 0.86, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 14.14, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 797.91, + "tps_std": 0.87, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 13.88, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 738.56, + "tps_std": 1.66, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 14.15, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 810.24, + "tps_std": 2.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 13.89, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 751.87, + "tps_std": 1.69, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 14.15, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 798.06, + "tps_std": 1.45, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 13.88, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 738.5, + "tps_std": 1.56, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 14.15, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 801.53, + "tps_std": 1.48, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 13.88, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 751.81, + "tps_std": 0.96, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -8528,7 +11002,7 @@ "env_variant": "hblt0", "fa": false, "test": "tg128", - "tps_mean": 14.05, + "tps_mean": 14.15, "tps_std": 0.0, "error": false, "error_type": null, @@ -8541,8 +11015,8 @@ "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -8553,8 +11027,8 @@ "env_variant": "hblt0", "fa": true, "test": "pp512", - "tps_mean": 559.73, - "tps_std": 0.51, + "tps_mean": 791.04, + "tps_std": 2.55, "error": false, "error_type": null, "backend": "ROCm", @@ -8566,8 +11040,8 @@ "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -8578,7 +11052,7 @@ "env_variant": "hblt0", "fa": true, "test": "tg128", - "tps_mean": 13.79, + "tps_mean": 13.87, "tps_std": 0.0, "error": false, "error_type": null, @@ -8591,8 +11065,8 @@ "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -8603,8 +11077,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 680.44, - "tps_std": 0.55, + "tps_mean": 679.86, + "tps_std": 1.33, "error": false, "error_type": null, "backend": "Vulkan", @@ -8616,8 +11090,8 @@ "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -8628,7 +11102,7 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 14.39, + "tps_mean": 14.6, "tps_std": 0.03, "error": false, "error_type": null, @@ -8641,8 +11115,8 @@ "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -8653,8 +11127,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 371.66, - "tps_std": 0.51, + "tps_mean": 659.67, + "tps_std": 0.72, "error": false, "error_type": null, "backend": "Vulkan", @@ -8666,8 +11140,8 @@ "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -8678,7 +11152,7 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 12.62, + "tps_mean": 14.5, "tps_std": 0.0, "error": false, "error_type": null, @@ -8691,8 +11165,8 @@ "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -8703,8 +11177,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 502.88, - "tps_std": 1.45, + "tps_mean": 504.31, + "tps_std": 3.2, "error": false, "error_type": null, "backend": "Vulkan", @@ -8716,8 +11190,8 @@ "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -8728,8 +11202,8 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 14.21, - "tps_std": 0.0, + "tps_mean": 14.14, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", @@ -8741,8 +11215,8 @@ "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -8753,8 +11227,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 496.33, - "tps_std": 1.83, + "tps_mean": 501.78, + "tps_std": 2.8, "error": false, "error_type": null, "backend": "Vulkan", @@ -8766,8 +11240,8 @@ "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -8778,8 +11252,8 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 14.02, - "tps_std": 0.0, + "tps_mean": 13.95, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", @@ -8791,8 +11265,8 @@ "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -8803,8 +11277,8 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 395.28, - "tps_std": 0.22, + "tps_mean": 413.72, + "tps_std": 0.86, "error": false, "error_type": null, "backend": "ROCm", @@ -8816,8 +11290,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -8828,7 +11302,7 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 3.96, + "tps_mean": 4.09, "tps_std": 0.0, "error": false, "error_type": null, @@ -8841,8 +11315,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -8853,8 +11327,8 @@ "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 468.37, - "tps_std": 1.54, + "tps_mean": 469.46, + "tps_std": 1.37, "error": false, "error_type": null, "backend": "ROCm", @@ -8866,8 +11340,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -8878,7 +11352,7 @@ "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 4.08, + "tps_mean": 4.11, "tps_std": 0.0, "error": false, "error_type": null, @@ -8891,8 +11365,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -8903,8 +11377,8 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "pp512", - "tps_mean": 79.42, - "tps_std": 0.41, + "tps_mean": 84.71, + "tps_std": 8.12, "error": false, "error_type": null, "backend": "ROCm", @@ -8916,8 +11390,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -8928,7 +11402,7 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "tg128", - "tps_mean": 3.97, + "tps_mean": 4.1, "tps_std": 0.0, "error": false, "error_type": null, @@ -8941,8 +11415,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -8953,8 +11427,8 @@ "env_variant": "rocwmma-hblt0", "fa": true, "test": "pp512", - "tps_mean": 89.19, - "tps_std": 0.53, + "tps_mean": 84.12, + "tps_std": 9.82, "error": false, "error_type": null, "backend": "ROCm", @@ -8966,8 +11440,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -8978,7 +11452,7 @@ "env_variant": "rocwmma-hblt0", "fa": true, "test": "tg128", - "tps_mean": 4.06, + "tps_mean": 4.11, "tps_std": 0.0, "error": false, "error_type": null, @@ -8991,8 +11465,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -9003,307 +11477,7 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 398.35, - "tps_std": 1.07, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 4.09, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 302.82, - "tps_std": 2.53, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 4.09, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 59.13, - "tps_std": 7.79, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 4.09, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 61.26, - "tps_std": 10.54, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 4.09, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 418.46, - "tps_std": 0.1, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 4.09, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 466.83, - "tps_std": 1.65, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 4.07, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 454.1, + "tps_mean": 408.4, "tps_std": 1.09, "error": false, "error_type": null, @@ -9314,10 +11488,510 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 4.1, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 470.49, + "tps_std": 1.46, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 4.1, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 84.93, + "tps_std": 8.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 4.1, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 83.22, + "tps_std": 10.78, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 4.1, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": false, + "test": "pp512", + "tps_mean": 412.86, + "tps_std": 1.22, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": false, + "test": "tg128", + "tps_mean": 4.1, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": true, + "test": "pp512", + "tps_mean": 465.55, + "tps_std": 1.95, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": true, + "test": "tg128", + "tps_mean": 4.1, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 453.66, + "tps_std": 0.77, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 4.1, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 498.77, + "tps_std": 0.53, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 4.1, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 419.05, + "tps_std": 0.86, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 4.09, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 466.36, + "tps_std": 1.34, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 4.1, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 451.57, + "tps_std": 0.41, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -9341,8 +12015,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -9353,8 +12027,8 @@ "env_variant": "rocwmma-hblt0", "fa": true, "test": "pp512", - "tps_mean": 499.43, - "tps_std": 1.24, + "tps_mean": 499.87, + "tps_std": 0.47, "error": false, "error_type": null, "backend": "ROCm", @@ -9366,8 +12040,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -9378,7 +12052,7 @@ "env_variant": "rocwmma-hblt0", "fa": true, "test": "tg128", - "tps_mean": 4.06, + "tps_mean": 4.1, "tps_std": 0.0, "error": false, "error_type": null, @@ -9391,8 +12065,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -9403,8 +12077,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 392.5, - "tps_std": 0.5, + "tps_mean": 422.0, + "tps_std": 0.56, "error": false, "error_type": null, "backend": "ROCm", @@ -9416,8 +12090,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -9428,7 +12102,7 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 3.97, + "tps_mean": 4.1, "tps_std": 0.0, "error": false, "error_type": null, @@ -9441,8 +12115,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -9453,8 +12127,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 311.25, - "tps_std": 0.72, + "tps_mean": 469.45, + "tps_std": 1.83, "error": false, "error_type": null, "backend": "ROCm", @@ -9466,8 +12140,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -9478,7 +12152,7 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 4.09, + "tps_mean": 4.11, "tps_std": 0.0, "error": false, "error_type": null, @@ -9491,8 +12165,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -9503,8 +12177,8 @@ "env_variant": "hblt0", "fa": false, "test": "pp512", - "tps_mean": 451.69, - "tps_std": 0.62, + "tps_mean": 453.24, + "tps_std": 0.64, "error": false, "error_type": null, "backend": "ROCm", @@ -9516,8 +12190,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -9541,8 +12215,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -9553,8 +12227,8 @@ "env_variant": "hblt0", "fa": true, "test": "pp512", - "tps_mean": 324.43, - "tps_std": 0.22, + "tps_mean": 502.26, + "tps_std": 0.81, "error": false, "error_type": null, "backend": "ROCm", @@ -9566,8 +12240,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -9578,7 +12252,7 @@ "env_variant": "hblt0", "fa": true, "test": "tg128", - "tps_mean": 4.09, + "tps_mean": 4.1, "tps_std": 0.0, "error": false, "error_type": null, @@ -9591,8 +12265,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -9647,8 +12321,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 129.49, - "tps_std": 0.34, + "tps_mean": 134.52, + "tps_std": 0.99, "error": false, "error_type": null, "backend": "Vulkan", @@ -9660,8 +12334,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -9672,7 +12346,7 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 4.06, + "tps_mean": 3.92, "tps_std": 0.0, "error": false, "error_type": null, @@ -9685,8 +12359,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -9697,8 +12371,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 137.67, - "tps_std": 1.25, + "tps_mean": 138.59, + "tps_std": 1.23, "error": false, "error_type": null, "backend": "Vulkan", @@ -9710,8 +12384,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -9722,7 +12396,7 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 4.06, + "tps_mean": 3.93, "tps_std": 0.0, "error": false, "error_type": null, @@ -9735,8 +12409,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -9747,8 +12421,8 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 2033.46, - "tps_std": 5.16, + "tps_mean": 1886.62, + "tps_std": 6.81, "error": false, "error_type": null, "backend": "ROCm", @@ -9760,8 +12434,8 @@ "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -9772,8 +12446,8 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 76.47, - "tps_std": 0.26, + "tps_mean": 76.36, + "tps_std": 5.1, "error": false, "error_type": null, "backend": "ROCm", @@ -9785,8 +12459,8 @@ "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -9797,8 +12471,8 @@ "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 2276.86, - "tps_std": 9.6, + "tps_mean": 2282.08, + "tps_std": 7.86, "error": false, "error_type": null, "backend": "ROCm", @@ -9810,8 +12484,8 @@ "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -9822,8 +12496,8 @@ "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 70.76, - "tps_std": 0.26, + "tps_mean": 72.4, + "tps_std": 0.03, "error": false, "error_type": null, "backend": "ROCm", @@ -9835,8 +12509,8 @@ "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -9847,8 +12521,8 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "pp512", - "tps_mean": 727.18, - "tps_std": 2.22, + "tps_mean": 713.12, + "tps_std": 38.25, "error": false, "error_type": null, "backend": "ROCm", @@ -9860,8 +12534,8 @@ "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -9872,8 +12546,8 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "tg128", - "tps_mean": 75.65, - "tps_std": 0.74, + "tps_mean": 67.01, + "tps_std": 5.06, "error": false, "error_type": null, "backend": "ROCm", @@ -9885,8 +12559,8 @@ "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -9897,8 +12571,8 @@ "env_variant": "rocwmma-hblt0", "fa": true, "test": "pp512", - "tps_mean": 740.27, - "tps_std": 10.38, + "tps_mean": 676.8, + "tps_std": 75.42, "error": false, "error_type": null, "backend": "ROCm", @@ -9910,8 +12584,8 @@ "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -9922,8 +12596,8 @@ "env_variant": "rocwmma-hblt0", "fa": true, "test": "tg128", - "tps_mean": 70.76, - "tps_std": 0.11, + "tps_mean": 59.1, + "tps_std": 2.57, "error": false, "error_type": null, "backend": "ROCm", @@ -9935,8 +12609,8 @@ "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -9947,8 +12621,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 2035.38, - "tps_std": 4.03, + "tps_mean": 1857.54, + "tps_std": 7.32, "error": false, "error_type": null, "backend": "ROCm", @@ -9960,8 +12634,8 @@ "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -9972,8 +12646,8 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 75.4, - "tps_std": 0.8, + "tps_mean": 75.34, + "tps_std": 7.91, "error": false, "error_type": null, "backend": "ROCm", @@ -9985,8 +12659,8 @@ "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -9997,8 +12671,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 1515.55, - "tps_std": 8.1, + "tps_mean": 2214.91, + "tps_std": 7.2, "error": false, "error_type": null, "backend": "ROCm", @@ -10010,8 +12684,8 @@ "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -10022,307 +12696,7 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 70.2, - "tps_std": 0.39, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 714.75, - "tps_std": 27.98, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 66.1, - "tps_std": 5.25, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 596.86, - "tps_std": 37.66, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 58.75, - "tps_std": 3.09, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 2014.6, - "tps_std": 24.35, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 59.16, - "tps_std": 3.76, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 2191.77, - "tps_std": 78.21, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 54.32, - "tps_std": 2.65, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 1991.71, - "tps_std": 2.91, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 56.37, - "tps_std": 3.4, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 2096.22, - "tps_std": 4.59, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 64.88, + "tps_mean": 72.36, "tps_std": 0.05, "error": false, "error_type": null, @@ -10333,10 +12707,510 @@ "file_size_gib": 1.8, "name_params_b": 3.88, "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 656.82, + "tps_std": 60.97, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 63.81, + "tps_std": 3.45, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 663.36, + "tps_std": 79.77, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 58.63, + "tps_std": 2.61, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": false, + "test": "pp512", + "tps_mean": 1830.34, + "tps_std": 15.12, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": false, + "test": "tg128", + "tps_mean": 60.04, + "tps_std": 4.39, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": true, + "test": "pp512", + "tps_mean": 2178.17, + "tps_std": 91.83, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": true, + "test": "tg128", + "tps_mean": 55.78, + "tps_std": 3.2, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 2016.93, + "tps_std": 4.81, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 58.29, + "tps_std": 3.79, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 2094.58, + "tps_std": 12.74, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 66.23, + "tps_std": 8.72, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 1865.95, + "tps_std": 7.12, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 79.56, + "tps_std": 0.09, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 2252.6, + "tps_std": 11.74, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 72.57, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 2011.51, + "tps_std": 6.91, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 79.65, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 2111.65, + "tps_std": 7.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 67.62, + "tps_std": 4.71, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -10347,8 +13221,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 2027.41, - "tps_std": 4.62, + "tps_mean": 1869.83, + "tps_std": 5.67, "error": false, "error_type": null, "backend": "ROCm", @@ -10360,8 +13234,8 @@ "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -10372,8 +13246,8 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 77.12, - "tps_std": 0.03, + "tps_mean": 79.48, + "tps_std": 0.04, "error": false, "error_type": null, "backend": "ROCm", @@ -10385,8 +13259,8 @@ "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -10397,8 +13271,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 1550.55, - "tps_std": 4.52, + "tps_mean": 2229.43, + "tps_std": 7.33, "error": false, "error_type": null, "backend": "ROCm", @@ -10410,8 +13284,8 @@ "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -10422,57 +13296,7 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 70.54, - "tps_std": 0.06, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 1992.48, - "tps_std": 7.34, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 77.05, + "tps_mean": 72.58, "tps_std": 0.03, "error": false, "error_type": null, @@ -10483,10 +13307,60 @@ "file_size_gib": 1.8, "name_params_b": 3.88, "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 2014.48, + "tps_std": 4.39, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 79.61, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -10497,8 +13371,8 @@ "env_variant": "hblt0", "fa": true, "test": "pp512", - "tps_mean": 1474.15, - "tps_std": 1.44, + "tps_mean": 2064.91, + "tps_std": 7.11, "error": false, "error_type": null, "backend": "ROCm", @@ -10510,8 +13384,8 @@ "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -10522,8 +13396,8 @@ "env_variant": "hblt0", "fa": true, "test": "tg128", - "tps_mean": 70.44, - "tps_std": 0.01, + "tps_mean": 72.45, + "tps_std": 0.03, "error": false, "error_type": null, "backend": "ROCm", @@ -10535,8 +13409,8 @@ "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -10547,8 +13421,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 1593.62, - "tps_std": 2.9, + "tps_mean": 1288.81, + "tps_std": 206.13, "error": false, "error_type": null, "backend": "Vulkan", @@ -10560,8 +13434,8 @@ "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -10572,8 +13446,8 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 85.26, - "tps_std": 0.26, + "tps_mean": 86.61, + "tps_std": 1.74, "error": false, "error_type": null, "backend": "Vulkan", @@ -10585,8 +13459,8 @@ "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -10597,8 +13471,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 936.52, - "tps_std": 2.35, + "tps_mean": 1149.64, + "tps_std": 181.24, "error": false, "error_type": null, "backend": "Vulkan", @@ -10610,8 +13484,8 @@ "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -10622,8 +13496,8 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 60.89, - "tps_std": 0.1, + "tps_mean": 85.5, + "tps_std": 1.74, "error": false, "error_type": null, "backend": "Vulkan", @@ -10635,8 +13509,8 @@ "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -10647,8 +13521,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 1515.05, - "tps_std": 2.98, + "tps_mean": 967.51, + "tps_std": 123.3, "error": false, "error_type": null, "backend": "Vulkan", @@ -10660,8 +13534,8 @@ "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -10672,8 +13546,8 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 87.54, - "tps_std": 0.18, + "tps_mean": 86.74, + "tps_std": 1.45, "error": false, "error_type": null, "backend": "Vulkan", @@ -10685,8 +13559,8 @@ "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -10697,8 +13571,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 1476.16, - "tps_std": 5.12, + "tps_mean": 991.94, + "tps_std": 120.98, "error": false, "error_type": null, "backend": "Vulkan", @@ -10710,8 +13584,8 @@ "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -10722,8 +13596,8 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 82.48, - "tps_std": 0.36, + "tps_mean": 85.61, + "tps_std": 0.84, "error": false, "error_type": null, "backend": "Vulkan", @@ -10735,8 +13609,8 @@ "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -10747,8 +13621,8 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 622.16, - "tps_std": 6.71, + "tps_mean": 629.19, + "tps_std": 3.98, "error": false, "error_type": null, "backend": "ROCm", @@ -10760,8 +13634,8 @@ "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm6_4_3-rocwmma.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -10772,7 +13646,7 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 33.91, + "tps_mean": 34.79, "tps_std": 0.01, "error": false, "error_type": null, @@ -10785,8 +13659,8 @@ "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm6_4_3-rocwmma.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -10797,8 +13671,8 @@ "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 743.09, - "tps_std": 4.89, + "tps_mean": 780.88, + "tps_std": 9.39, "error": false, "error_type": null, "backend": "ROCm", @@ -10810,8 +13684,8 @@ "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -10822,7 +13696,7 @@ "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 33.76, + "tps_mean": 34.14, "tps_std": 0.0, "error": false, "error_type": null, @@ -10835,8 +13709,8 @@ "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -10847,8 +13721,8 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "pp512", - "tps_mean": 354.98, - "tps_std": 0.72, + "tps_mean": 364.08, + "tps_std": 1.11, "error": false, "error_type": null, "backend": "ROCm", @@ -10860,8 +13734,8 @@ "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -10872,783 +13746,980 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "tg128", + "tps_mean": 34.81, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 400.84, + "tps_std": 0.84, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 34.17, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 627.57, + "tps_std": 4.14, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_3.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 34.78, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_3.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 783.4, + "tps_std": 1.22, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_3__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 34.15, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_3__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": false, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "hang", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_3__hblt0.log", + "build": null + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 402.16, + "tps_std": 1.31, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_3__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 34.16, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_3__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": false, + "test": "pp512", + "tps_mean": 650.02, + "tps_std": 4.28, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": false, + "test": "tg128", + "tps_mean": 34.0, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": true, + "test": "pp512", + "tps_mean": 778.25, + "tps_std": 3.4, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": true, + "test": "tg128", + "tps_mean": 34.25, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 688.7, + "tps_std": 7.72, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 34.83, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 780.39, + "tps_std": 6.28, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 34.2, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 653.89, + "tps_std": 3.96, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 34.89, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 770.19, + "tps_std": 5.64, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 34.18, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 691.27, + "tps_std": 4.9, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 34.83, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 772.44, + "tps_std": 6.68, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 34.24, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 653.09, + "tps_std": 7.25, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 34.86, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 779.77, + "tps_std": 4.78, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 34.28, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 683.9, + "tps_std": 5.36, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 34.77, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 777.37, + "tps_std": 4.77, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 34.17, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 626.37, + "tps_std": 2.34, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__vulkan_amdvlk.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 35.23, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__vulkan_amdvlk.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 723.8, + "tps_std": 2.49, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 34.59, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 401.61, + "tps_std": 1.76, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__vulkan_radv.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "tg128", "tps_mean": 33.86, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 390.67, - "tps_std": 0.97, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 33.79, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 617.0, - "tps_std": 4.97, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_3.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 33.9, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_3.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 543.39, - "tps_std": 5.51, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_3__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 33.28, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_3__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 354.18, - "tps_std": 0.29, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_3__hblt0.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 33.88, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_3__hblt0.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 322.46, - "tps_std": 0.46, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_3__hblt0__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 33.33, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_3__hblt0__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 643.61, - "tps_std": 7.14, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 33.91, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 736.33, - "tps_std": 3.33, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 33.74, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 651.63, - "tps_std": 3.08, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 33.88, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 738.84, - "tps_std": 9.12, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 33.79, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 649.28, - "tps_std": 0.87, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 33.99, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 550.01, - "tps_std": 3.85, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 33.38, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 659.79, - "tps_std": 3.13, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc__hblt0.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 34.01, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc__hblt0.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 553.65, - "tps_std": 2.4, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 33.31, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 449.86, - "tps_std": 1.68, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__vulkan_amdvlk.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 34.19, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__vulkan_amdvlk.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 496.21, - "tps_std": 1.71, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 33.64, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 230.09, - "tps_std": 0.83, + "tps_std": 0.03, "error": false, "error_type": null, "backend": "Vulkan", @@ -11660,33 +14731,8 @@ "quant": "F16", "log": "results/gpt-oss-120b-F16__vulkan_radv.log", "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 33.57, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__vulkan_radv.log", - "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -11697,8 +14743,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 243.96, - "tps_std": 0.96, + "tps_mean": 444.61, + "tps_std": 1.65, "error": false, "error_type": null, "backend": "Vulkan", @@ -11710,8 +14756,8 @@ "quant": "F16", "log": "results/gpt-oss-120b-F16__vulkan_radv__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -11722,35 +14768,60 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 33.79, + "tps_mean": 33.84, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__vulkan_radv__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 612.55, + "tps_std": 6.58, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 47.08, "tps_std": 0.01, "error": false, "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__vulkan_radv__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 606.86, - "tps_std": 5.18, - "error": false, - "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, @@ -11760,33 +14831,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma.log", "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 45.26, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma.log", - "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -11797,8 +14843,8 @@ "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 732.72, - "tps_std": 4.06, + "tps_mean": 766.08, + "tps_std": 2.67, "error": false, "error_type": null, "backend": "ROCm", @@ -11810,8 +14856,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -11822,7 +14868,7 @@ "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 45.14, + "tps_mean": 45.93, "tps_std": 0.01, "error": false, "error_type": null, @@ -11835,8 +14881,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -11847,8 +14893,8 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "pp512", - "tps_mean": 351.42, - "tps_std": 1.56, + "tps_mean": 362.01, + "tps_std": 1.06, "error": false, "error_type": null, "backend": "ROCm", @@ -11860,8 +14906,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -11872,8 +14918,8 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "tg128", - "tps_mean": 45.39, - "tps_std": 0.01, + "tps_mean": 47.04, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -11885,8 +14931,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -11919,8 +14965,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 608.2, - "tps_std": 7.04, + "tps_mean": 614.68, + "tps_std": 3.32, "error": false, "error_type": null, "backend": "ROCm", @@ -11932,8 +14978,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -11944,7 +14990,7 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 45.4, + "tps_mean": 47.04, "tps_std": 0.01, "error": false, "error_type": null, @@ -11957,8 +15003,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -11969,8 +15015,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 533.95, - "tps_std": 3.58, + "tps_mean": 768.28, + "tps_std": 5.81, "error": false, "error_type": null, "backend": "ROCm", @@ -11982,8 +15028,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -11994,307 +15040,7 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 44.41, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 352.53, - "tps_std": 0.81, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 45.41, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 320.78, - "tps_std": 0.96, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 44.49, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 635.84, - "tps_std": 5.72, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 45.26, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 708.36, - "tps_std": 12.96, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 44.85, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 650.68, - "tps_std": 9.08, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 45.26, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 734.35, - "tps_std": 10.26, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 44.85, + "tps_mean": 45.86, "tps_std": 0.0, "error": false, "error_type": null, @@ -12305,10 +15051,510 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 362.06, + "tps_std": 1.45, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 47.11, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 397.06, + "tps_std": 1.41, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 46.01, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": false, + "test": "pp512", + "tps_mean": 639.82, + "tps_std": 2.41, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": false, + "test": "tg128", + "tps_mean": 46.29, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": true, + "test": "pp512", + "tps_mean": 756.98, + "tps_std": 1.3, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": true, + "test": "tg128", + "tps_mean": 46.0, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 683.94, + "tps_std": 2.89, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 47.29, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 778.15, + "tps_std": 4.46, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 46.04, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 641.91, + "tps_std": 7.56, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 47.2, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 756.17, + "tps_std": 4.24, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 46.05, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 681.37, + "tps_std": 3.54, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 47.19, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 770.6, + "tps_std": 3.18, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 46.09, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -12319,8 +15565,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 646.07, - "tps_std": 6.86, + "tps_mean": 641.87, + "tps_std": 3.27, "error": false, "error_type": null, "backend": "ROCm", @@ -12332,8 +15578,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -12344,7 +15590,7 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 45.5, + "tps_mean": 47.17, "tps_std": 0.01, "error": false, "error_type": null, @@ -12357,8 +15603,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -12369,160 +15615,310 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 541.57, + "tps_mean": 757.39, + "tps_std": 3.8, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 46.04, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 688.94, + "tps_std": 3.9, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 47.06, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 769.31, + "tps_std": 5.48, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 46.07, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 682.6, + "tps_std": 3.3, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 51.41, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 790.49, + "tps_std": 4.84, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 50.15, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 426.15, + "tps_std": 2.65, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 52.79, + "tps_std": 0.16, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 478.69, "tps_std": 3.26, "error": false, "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 44.31, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 657.58, - "tps_std": 3.78, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 45.56, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 550.79, - "tps_std": 2.99, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 44.41, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 485.54, - "tps_std": 2.45, - "error": false, - "error_type": null, "backend": "Vulkan", "ngl": 99, "mmap": 0, @@ -12530,135 +15926,10 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 49.29, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 540.81, - "tps_std": 2.56, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 48.25, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 239.24, - "tps_std": 1.27, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 50.39, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log", - "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -12668,9 +15939,9 @@ "env_base": "vulkan_radv", "env_variant": null, "fa": true, - "test": "pp512", - "tps_mean": 255.5, - "tps_std": 1.49, + "test": "tg128", + "tps_mean": 52.75, + "tps_std": 0.06, "error": false, "error_type": null, "backend": "Vulkan", @@ -12682,33 +15953,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 50.41, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -12719,8 +15965,8 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 1198.51, - "tps_std": 10.39, + "tps_mean": 1140.4, + "tps_std": 8.72, "error": false, "error_type": null, "backend": "ROCm", @@ -12732,8 +15978,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm6_4_3-rocwmma.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -12744,7 +15990,7 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 27.14, + "tps_mean": 27.24, "tps_std": 0.0, "error": false, "error_type": null, @@ -12757,8 +16003,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm6_4_3-rocwmma.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -12769,8 +16015,8 @@ "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 1506.44, - "tps_std": 7.03, + "tps_mean": 1492.3, + "tps_std": 22.33, "error": false, "error_type": null, "backend": "ROCm", @@ -12782,8 +16028,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -12794,8 +16040,8 @@ "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 27.1, - "tps_std": 0.01, + "tps_mean": 26.96, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -12807,8 +16053,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -12819,8 +16065,8 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "pp512", - "tps_mean": 326.8, - "tps_std": 4.56, + "tps_mean": 327.64, + "tps_std": 1.89, "error": false, "error_type": null, "backend": "ROCm", @@ -12832,8 +16078,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -12844,8 +16090,8 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "tg128", - "tps_mean": 27.13, - "tps_std": 0.01, + "tps_mean": 27.19, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -12857,8 +16103,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -12869,8 +16115,8 @@ "env_variant": "rocwmma-hblt0", "fa": true, "test": "pp512", - "tps_mean": 350.18, - "tps_std": 5.1, + "tps_mean": 342.77, + "tps_std": 3.39, "error": false, "error_type": null, "backend": "ROCm", @@ -12882,8 +16128,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -12894,7 +16140,7 @@ "env_variant": "rocwmma-hblt0", "fa": true, "test": "tg128", - "tps_mean": 27.09, + "tps_mean": 27.0, "tps_std": 0.0, "error": false, "error_type": null, @@ -12907,8 +16153,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -12919,8 +16165,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 1185.57, - "tps_std": 6.55, + "tps_mean": 1147.38, + "tps_std": 6.4, "error": false, "error_type": null, "backend": "ROCm", @@ -12932,8 +16178,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm6_4_3.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -12944,8 +16190,8 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 27.12, - "tps_std": 0.01, + "tps_mean": 27.24, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -12957,8 +16203,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm6_4_3.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -12969,8 +16215,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 1000.77, - "tps_std": 2.37, + "tps_mean": 1508.59, + "tps_std": 26.99, "error": false, "error_type": null, "backend": "ROCm", @@ -12982,8 +16228,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm6_4_3__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -12994,7 +16240,7 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 26.83, + "tps_mean": 27.0, "tps_std": 0.0, "error": false, "error_type": null, @@ -13007,8 +16253,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm6_4_3__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -13019,8 +16265,8 @@ "env_variant": "hblt0", "fa": false, "test": "pp512", - "tps_mean": 322.0, - "tps_std": 4.37, + "tps_mean": 326.33, + "tps_std": 6.68, "error": false, "error_type": null, "backend": "ROCm", @@ -13032,8 +16278,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm6_4_3__hblt0.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -13044,8 +16290,8 @@ "env_variant": "hblt0", "fa": false, "test": "tg128", - "tps_mean": 27.14, - "tps_std": 0.01, + "tps_mean": 27.2, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -13057,8 +16303,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm6_4_3__hblt0.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -13069,8 +16315,8 @@ "env_variant": "hblt0", "fa": true, "test": "pp512", - "tps_mean": 303.26, - "tps_std": 4.84, + "tps_mean": 344.41, + "tps_std": 7.32, "error": false, "error_type": null, "backend": "ROCm", @@ -13082,8 +16328,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm6_4_3__hblt0__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -13094,6 +16340,106 @@ "env_variant": "hblt0", "fa": true, "test": "tg128", + "tps_mean": 26.96, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm6_4_3__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": false, + "test": "pp512", + "tps_mean": 1202.41, + "tps_std": 13.79, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": false, + "test": "tg128", + "tps_mean": 26.03, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": true, + "test": "pp512", + "tps_mean": 1484.6, + "tps_std": 5.26, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": true, + "test": "tg128", "tps_mean": 26.9, "tps_std": 0.0, "error": false, @@ -13105,10 +16451,110 @@ "file_size_gib": 38.97, "name_params_b": 20.91, "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_3__hblt0__fa1.log", + "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 1225.63, + "tps_std": 9.42, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 27.25, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 1421.82, + "tps_std": 12.16, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 26.95, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -13119,8 +16565,8 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 1256.75, - "tps_std": 10.54, + "tps_mean": 1209.21, + "tps_std": 16.57, "error": false, "error_type": null, "backend": "ROCm", @@ -13132,8 +16578,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -13144,7 +16590,7 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 27.11, + "tps_mean": 27.23, "tps_std": 0.0, "error": false, "error_type": null, @@ -13157,8 +16603,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -13169,8 +16615,8 @@ "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 1481.17, - "tps_std": 9.67, + "tps_mean": 1489.0, + "tps_std": 6.12, "error": false, "error_type": null, "backend": "ROCm", @@ -13182,8 +16628,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -13194,7 +16640,7 @@ "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 27.03, + "tps_mean": 26.98, "tps_std": 0.0, "error": false, "error_type": null, @@ -13207,8 +16653,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -13219,8 +16665,8 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "pp512", - "tps_mean": 1202.19, - "tps_std": 5.53, + "tps_mean": 1218.32, + "tps_std": 13.0, "error": false, "error_type": null, "backend": "ROCm", @@ -13232,8 +16678,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -13244,7 +16690,7 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "tg128", - "tps_mean": 27.1, + "tps_mean": 27.21, "tps_std": 0.0, "error": false, "error_type": null, @@ -13257,8 +16703,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -13269,8 +16715,8 @@ "env_variant": "rocwmma-hblt0", "fa": true, "test": "pp512", - "tps_mean": 1422.9, - "tps_std": 11.48, + "tps_mean": 1424.6, + "tps_std": 8.06, "error": false, "error_type": null, "backend": "ROCm", @@ -13282,8 +16728,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -13294,7 +16740,7 @@ "env_variant": "rocwmma-hblt0", "fa": true, "test": "tg128", - "tps_mean": 27.04, + "tps_mean": 26.98, "tps_std": 0.0, "error": false, "error_type": null, @@ -13307,8 +16753,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -13319,8 +16765,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 1253.01, - "tps_std": 23.2, + "tps_mean": 1198.99, + "tps_std": 21.23, "error": false, "error_type": null, "backend": "ROCm", @@ -13332,8 +16778,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -13344,7 +16790,7 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 27.11, + "tps_mean": 27.25, "tps_std": 0.0, "error": false, "error_type": null, @@ -13357,8 +16803,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -13369,8 +16815,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 1005.24, - "tps_std": 32.45, + "tps_mean": 1506.46, + "tps_std": 15.83, "error": false, "error_type": null, "backend": "ROCm", @@ -13382,8 +16828,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -13394,7 +16840,7 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 26.89, + "tps_mean": 26.98, "tps_std": 0.0, "error": false, "error_type": null, @@ -13407,8 +16853,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -13419,8 +16865,8 @@ "env_variant": "hblt0", "fa": false, "test": "pp512", - "tps_mean": 1220.02, - "tps_std": 12.3, + "tps_mean": 1224.83, + "tps_std": 11.58, "error": false, "error_type": null, "backend": "ROCm", @@ -13432,8 +16878,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc__hblt0.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -13444,7 +16890,7 @@ "env_variant": "hblt0", "fa": false, "test": "tg128", - "tps_mean": 27.17, + "tps_mean": 27.2, "tps_std": 0.0, "error": false, "error_type": null, @@ -13457,8 +16903,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc__hblt0.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -13469,8 +16915,8 @@ "env_variant": "hblt0", "fa": true, "test": "pp512", - "tps_mean": 985.58, - "tps_std": 10.64, + "tps_mean": 1446.22, + "tps_std": 17.28, "error": false, "error_type": null, "backend": "ROCm", @@ -13482,8 +16928,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -13494,7 +16940,7 @@ "env_variant": "hblt0", "fa": true, "test": "tg128", - "tps_mean": 26.88, + "tps_mean": 26.99, "tps_std": 0.0, "error": false, "error_type": null, @@ -13507,8 +16953,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -13519,8 +16965,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 367.61, - "tps_std": 1.9, + "tps_mean": 469.06, + "tps_std": 6.82, "error": false, "error_type": null, "backend": "Vulkan", @@ -13532,8 +16978,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__vulkan_amdvlk.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -13544,7 +16990,7 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 8.69, + "tps_mean": 15.25, "tps_std": 0.01, "error": false, "error_type": null, @@ -13557,8 +17003,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__vulkan_amdvlk.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -13569,810 +17015,1010 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 386.12, - "tps_std": 1.98, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 8.66, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 315.56, - "tps_std": 1.4, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__vulkan_radv.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 7.86, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__vulkan_radv.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 333.31, - "tps_std": 1.47, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__vulkan_radv__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 7.92, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__vulkan_radv__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 1184.03, - "tps_std": 8.37, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 65.07, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 1480.28, - "tps_std": 9.38, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 64.45, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 584.04, - "tps_std": 2.52, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 64.87, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 643.25, - "tps_std": 3.86, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 64.67, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 1171.02, - "tps_std": 7.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 64.94, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 996.31, - "tps_std": 6.53, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 63.68, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 582.51, - "tps_std": 2.41, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 64.89, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 522.63, - "tps_std": 1.74, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 63.66, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 1236.64, - "tps_std": 11.2, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 64.78, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 1460.58, - "tps_std": 11.92, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 64.26, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 1299.34, - "tps_std": 7.77, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 64.85, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 1516.33, - "tps_std": 21.51, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 64.4, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 1246.14, - "tps_std": 8.32, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 65.15, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc.log", - "build": { - "hash": "de219279", - "number": "6181" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 1010.38, + "tps_mean": 514.17, "tps_std": 6.35, "error": false, "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 15.13, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 537.62, + "tps_std": 2.1, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__vulkan_radv.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 14.85, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__vulkan_radv.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 583.32, + "tps_std": 3.38, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__vulkan_radv__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 14.86, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__vulkan_radv__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 1164.39, + "tps_std": 11.24, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 67.35, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 1531.44, + "tps_std": 9.83, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_3-rocwmma", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 65.78, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 590.66, + "tps_std": 1.42, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 67.35, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 653.8, + "tps_std": 1.17, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 65.72, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 1160.12, + "tps_std": 12.72, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 67.19, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 1539.79, + "tps_std": 14.33, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 65.81, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 591.28, + "tps_std": 2.68, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 67.35, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 655.1, + "tps_std": 1.75, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_3-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 65.85, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": false, + "test": "pp512", + "tps_mean": 1222.12, + "tps_std": 10.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": false, + "test": "tg128", + "tps_mean": 67.34, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": true, + "test": "pp512", + "tps_mean": 1515.09, + "tps_std": 6.22, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": true, + "test": "tg128", + "tps_mean": 65.75, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 1335.14, + "tps_std": 17.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 67.33, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 1562.66, + "tps_std": 9.76, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 65.84, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 1215.59, + "tps_std": 8.93, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 67.39, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 1521.41, + "tps_std": 10.84, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 65.89, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 1334.89, + "tps_std": 9.58, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 67.43, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 1567.58, + "tps_std": 12.62, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 65.78, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 1219.34, + "tps_std": 5.57, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 67.37, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 1530.7, + "tps_std": 9.71, + "error": false, + "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, @@ -14382,8 +18028,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -14394,8 +18040,8 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 63.49, - "tps_std": 0.01, + "tps_mean": 65.84, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -14407,8 +18053,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -14419,8 +18065,8 @@ "env_variant": "hblt0", "fa": false, "test": "pp512", - "tps_mean": 1303.74, - "tps_std": 6.94, + "tps_mean": 1331.0, + "tps_std": 21.19, "error": false, "error_type": null, "backend": "ROCm", @@ -14432,8 +18078,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -14444,7 +18090,7 @@ "env_variant": "hblt0", "fa": false, "test": "tg128", - "tps_mean": 65.1, + "tps_mean": 67.41, "tps_std": 0.01, "error": false, "error_type": null, @@ -14457,8 +18103,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -14469,8 +18115,8 @@ "env_variant": "hblt0", "fa": true, "test": "pp512", - "tps_mean": 1037.92, - "tps_std": 11.67, + "tps_mean": 1575.63, + "tps_std": 16.6, "error": false, "error_type": null, "backend": "ROCm", @@ -14482,8 +18128,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -14494,7 +18140,7 @@ "env_variant": "hblt0", "fa": true, "test": "tg128", - "tps_mean": 63.63, + "tps_mean": 65.76, "tps_std": 0.01, "error": false, "error_type": null, @@ -14507,8 +18153,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -14519,8 +18165,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 1220.69, - "tps_std": 8.95, + "tps_mean": 1498.39, + "tps_std": 12.53, "error": false, "error_type": null, "backend": "Vulkan", @@ -14532,8 +18178,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -14544,57 +18190,7 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 71.42, - "tps_std": 0.2, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 1467.61, - "tps_std": 12.7, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 69.47, + "tps_mean": 74.08, "tps_std": 0.09, "error": false, "error_type": null, @@ -14605,10 +18201,60 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 1914.72, + "tps_std": 22.77, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 72.57, + "tps_std": 0.12, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -14619,8 +18265,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 651.21, - "tps_std": 5.24, + "tps_mean": 1002.66, + "tps_std": 7.71, "error": false, "error_type": null, "backend": "Vulkan", @@ -14632,8 +18278,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__vulkan_radv.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -14644,8 +18290,8 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 72.35, - "tps_std": 0.08, + "tps_mean": 74.77, + "tps_std": 0.18, "error": false, "error_type": null, "backend": "Vulkan", @@ -14657,8 +18303,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__vulkan_radv.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -14669,8 +18315,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 732.35, - "tps_std": 7.51, + "tps_mean": 1204.49, + "tps_std": 13.52, "error": false, "error_type": null, "backend": "Vulkan", @@ -14682,8 +18328,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -14694,8 +18340,8 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 72.05, - "tps_std": 0.07, + "tps_mean": 74.94, + "tps_std": 0.14, "error": false, "error_type": null, "backend": "Vulkan", @@ -14707,8 +18353,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -14719,8 +18365,8 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 981.76, - "tps_std": 1.61, + "tps_mean": 979.46, + "tps_std": 1.57, "error": false, "error_type": null, "backend": "ROCm", @@ -14732,8 +18378,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -14744,8 +18390,8 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 49.26, - "tps_std": 0.0, + "tps_mean": 49.9, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "ROCm", @@ -14757,8 +18403,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -14769,8 +18415,8 @@ "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 1096.97, - "tps_std": 5.09, + "tps_mean": 1100.15, + "tps_std": 1.95, "error": false, "error_type": null, "backend": "ROCm", @@ -14782,8 +18428,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -14794,107 +18440,7 @@ "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 48.33, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 348.0, - "tps_std": 0.44, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__hblt0.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 49.39, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__hblt0.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 367.46, - "tps_std": 0.31, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__hblt0__fa1.log", - "build": { - "hash": "1fe00296", - "number": "6182" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 48.2, + "tps_mean": 49.29, "tps_std": 0.01, "error": false, "error_type": null, @@ -14905,21 +18451,71 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__hblt0__fa1.log", + "log": "results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_3", + "env": "rocm6_4_3-rocwmma-hblt0", "env_base": "rocm6_4_3", - "env_variant": null, + "env_variant": "rocwmma-hblt0", "fa": false, "test": "pp512", - "tps_mean": 978.3, + "tps_mean": 348.31, + "tps_std": 0.71, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 49.9, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 366.05, "tps_std": 1.98, "error": false, "error_type": null, @@ -14930,10 +18526,60 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_3-rocwmma-hblt0", + "env_base": "rocm6_4_3", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 49.32, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_3", + "env_base": "rocm6_4_3", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 979.46, + "tps_std": 2.1, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm6_4_3.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -14944,8 +18590,8 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 49.39, - "tps_std": 0.0, + "tps_mean": 49.9, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -14957,8 +18603,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm6_4_3.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -14969,8 +18615,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 686.88, - "tps_std": 0.38, + "tps_mean": 1105.32, + "tps_std": 2.28, "error": false, "error_type": null, "backend": "ROCm", @@ -14982,8 +18628,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm6_4_3__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -14994,8 +18640,8 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 48.8, - "tps_std": 0.0, + "tps_mean": 49.24, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -15007,8 +18653,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm6_4_3__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -15019,8 +18665,8 @@ "env_variant": "hblt0", "fa": false, "test": "pp512", - "tps_mean": 348.07, - "tps_std": 0.5, + "tps_mean": 348.63, + "tps_std": 0.64, "error": false, "error_type": null, "backend": "ROCm", @@ -15032,8 +18678,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm6_4_3__hblt0.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -15044,7 +18690,7 @@ "env_variant": "hblt0", "fa": false, "test": "tg128", - "tps_mean": 49.36, + "tps_mean": 49.81, "tps_std": 0.0, "error": false, "error_type": null, @@ -15057,8 +18703,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm6_4_3__hblt0.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -15069,8 +18715,8 @@ "env_variant": "hblt0", "fa": true, "test": "pp512", - "tps_mean": 307.39, - "tps_std": 0.7, + "tps_mean": 368.28, + "tps_std": 0.81, "error": false, "error_type": null, "backend": "ROCm", @@ -15082,8 +18728,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm6_4_3__hblt0__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -15094,8 +18740,8 @@ "env_variant": "hblt0", "fa": true, "test": "tg128", - "tps_mean": 48.75, - "tps_std": 0.0, + "tps_mean": 49.35, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -15107,8 +18753,208 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm6_4_3__hblt0__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": false, + "test": "pp512", + "tps_mean": 977.63, + "tps_std": 2.98, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": false, + "test": "tg128", + "tps_mean": 49.91, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": true, + "test": "pp512", + "tps_mean": 1097.55, + "tps_std": 1.49, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-rocwmma-fa_all_quants", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants", + "fa": true, + "test": "tg128", + "tps_mean": 49.33, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 860.3, + "tps_std": 0.57, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 49.89, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 953.79, + "tps_std": 3.6, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-fa_all_quants-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 49.35, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", + "build": { + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -15119,8 +18965,8 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 978.15, - "tps_std": 1.18, + "tps_mean": 984.61, + "tps_std": 2.65, "error": false, "error_type": null, "backend": "ROCm", @@ -15132,8 +18978,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -15144,8 +18990,8 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 49.15, - "tps_std": 0.0, + "tps_mean": 49.94, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -15157,8 +19003,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -15169,8 +19015,8 @@ "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 1089.54, - "tps_std": 1.93, + "tps_mean": 1095.5, + "tps_std": 2.69, "error": false, "error_type": null, "backend": "ROCm", @@ -15182,8 +19028,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -15194,7 +19040,7 @@ "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 46.47, + "tps_mean": 49.34, "tps_std": 0.01, "error": false, "error_type": null, @@ -15207,8 +19053,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -15219,8 +19065,8 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "pp512", - "tps_mean": 863.15, - "tps_std": 2.9, + "tps_mean": 859.46, + "tps_std": 1.91, "error": false, "error_type": null, "backend": "ROCm", @@ -15232,8 +19078,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -15244,7 +19090,7 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "tg128", - "tps_mean": 49.09, + "tps_mean": 49.9, "tps_std": 0.0, "error": false, "error_type": null, @@ -15257,8 +19103,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -15269,8 +19115,8 @@ "env_variant": "rocwmma-hblt0", "fa": true, "test": "pp512", - "tps_mean": 947.88, - "tps_std": 1.69, + "tps_mean": 952.18, + "tps_std": 1.68, "error": false, "error_type": null, "backend": "ROCm", @@ -15282,8 +19128,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -15294,7 +19140,7 @@ "env_variant": "rocwmma-hblt0", "fa": true, "test": "tg128", - "tps_mean": 46.48, + "tps_mean": 49.32, "tps_std": 0.01, "error": false, "error_type": null, @@ -15307,8 +19153,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -15319,8 +19165,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 979.59, - "tps_std": 2.44, + "tps_mean": 980.24, + "tps_std": 1.4, "error": false, "error_type": null, "backend": "ROCm", @@ -15332,8 +19178,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -15344,7 +19190,7 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 49.38, + "tps_mean": 49.9, "tps_std": 0.0, "error": false, "error_type": null, @@ -15357,8 +19203,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -15369,8 +19215,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 684.81, - "tps_std": 1.14, + "tps_mean": 1100.05, + "tps_std": 4.01, "error": false, "error_type": null, "backend": "ROCm", @@ -15382,8 +19228,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -15394,7 +19240,7 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 48.97, + "tps_mean": 49.29, "tps_std": 0.01, "error": false, "error_type": null, @@ -15407,8 +19253,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -15419,8 +19265,8 @@ "env_variant": "hblt0", "fa": false, "test": "pp512", - "tps_mean": 865.92, - "tps_std": 1.53, + "tps_mean": 860.23, + "tps_std": 0.94, "error": false, "error_type": null, "backend": "ROCm", @@ -15432,8 +19278,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc__hblt0.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -15444,8 +19290,8 @@ "env_variant": "hblt0", "fa": false, "test": "tg128", - "tps_mean": 49.4, - "tps_std": 0.0, + "tps_mean": 49.92, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -15457,8 +19303,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc__hblt0.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -15469,8 +19315,8 @@ "env_variant": "hblt0", "fa": true, "test": "pp512", - "tps_mean": 630.67, - "tps_std": 1.16, + "tps_mean": 958.47, + "tps_std": 2.31, "error": false, "error_type": null, "backend": "ROCm", @@ -15482,8 +19328,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -15494,7 +19340,7 @@ "env_variant": "hblt0", "fa": true, "test": "tg128", - "tps_mean": 48.83, + "tps_mean": 49.29, "tps_std": 0.01, "error": false, "error_type": null, @@ -15507,8 +19353,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "de219279", - "number": "6181" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -15519,8 +19365,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 1305.67, - "tps_std": 1.36, + "tps_mean": 1317.02, + "tps_std": 4.04, "error": false, "error_type": null, "backend": "Vulkan", @@ -15532,8 +19378,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -15544,8 +19390,8 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 48.48, - "tps_std": 0.11, + "tps_mean": 53.59, + "tps_std": 0.07, "error": false, "error_type": null, "backend": "Vulkan", @@ -15557,8 +19403,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -15569,8 +19415,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 1377.39, - "tps_std": 0.62, + "tps_mean": 1380.42, + "tps_std": 7.77, "error": false, "error_type": null, "backend": "Vulkan", @@ -15582,8 +19428,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -15594,8 +19440,8 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 47.91, - "tps_std": 0.01, + "tps_mean": 52.95, + "tps_std": 0.07, "error": false, "error_type": null, "backend": "Vulkan", @@ -15607,8 +19453,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -15619,8 +19465,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 875.74, - "tps_std": 6.47, + "tps_mean": 868.7, + "tps_std": 8.94, "error": false, "error_type": null, "backend": "Vulkan", @@ -15632,8 +19478,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__vulkan_radv.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -15644,8 +19490,8 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 52.85, - "tps_std": 0.12, + "tps_mean": 54.37, + "tps_std": 0.04, "error": false, "error_type": null, "backend": "Vulkan", @@ -15657,8 +19503,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__vulkan_radv.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -15669,8 +19515,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 957.61, - "tps_std": 5.26, + "tps_mean": 957.23, + "tps_std": 9.23, "error": false, "error_type": null, "backend": "Vulkan", @@ -15682,8 +19528,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__vulkan_radv__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } }, { @@ -15694,8 +19540,8 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 52.16, - "tps_std": 0.08, + "tps_mean": 53.49, + "tps_std": 0.04, "error": false, "error_type": null, "backend": "Vulkan", @@ -15707,8 +19553,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__vulkan_radv__fa1.log", "build": { - "hash": "1fe00296", - "number": "6182" + "hash": "f1fbffb5", + "number": "6486" } } ]