From 67fb3a002b2343bcbad49c7a05e353ceb3448efb Mon Sep 17 00:00:00 2001 From: Donato Capitella Date: Sat, 15 Nov 2025 08:36:25 +0000 Subject: [PATCH] Updated benchmarks --- benchmark/delete_transient_failures.py | 141 + benchmark/generate_results.json.py | 44 +- ...02__rocm-7alpha-rocwmma-improved__fa1.log} | 10 +- ...ha-rocwmma-improved__fa1__longctx32768.log | 9 + ...01-of-00002__rocm-7alpha-rocwmma__fa1.log} | 10 +- ...rocm-7alpha-rocwmma__fa1__longctx32768.log | 9 + ..._K_XL-00001-of-00002__rocm-7alpha__fa1.log | 10 + ...-00002__rocm-7alpha__fa1__longctx32768.log | 6 + ...K_XL-00001-of-00002__rocm6_4_4-rocwmma.log | 15 - ...00001-of-00002__rocm6_4_4-rocwmma__fa1.log | 10 +- ...__rocm6_4_4-rocwmma__fa1__longctx32768.log | 9 + ...f-00002__rocm6_4_4-rocwmma__hblt0__fa1.log | 10 +- ..._4_4-rocwmma__hblt0__fa1__longctx32768.log | 9 + ...r-UD-Q4_K_XL-00001-of-00002__rocm6_4_4.log | 15 - ...Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log | 10 +- ...of-00002__rocm6_4_4__fa1__longctx32768.log | 10 + ...-00001-of-00002__rocm6_4_4__hblt0__fa1.log | 10 +- ...2__rocm6_4_4__hblt0__fa1__longctx32768.log | 9 + ...L-00001-of-00002__rocm7.1-rocwmma__fa1.log | 10 + ...02__rocm7.1-rocwmma__fa1__longctx32768.log | 9 + ...-of-00002__rocm7.1-rocwmma__hblt0__fa1.log | 10 + ...m7.1-rocwmma__hblt0__fa1__longctx32768.log | 9 + ...D-Q4_K_XL-00001-of-00002__rocm7.1__fa1.log | 10 + ...1-of-00002__rocm7.1__fa1__longctx32768.log | 10 + ...XL-00001-of-00002__rocm7.1__hblt0__fa1.log | 10 + ...002__rocm7.1__hblt0__fa1__longctx32768.log | 10 + ...-00001-of-00002__rocm7_rc-rocwmma__fa1.log | 10 +- ...2__rocm7_rc-rocwmma__fa1__longctx32768.log | 9 + ...0001-of-00002__rocm7_rc-rocwmma__hblt0.log | 10 - ...of-00002__rocm7_rc-rocwmma__hblt0__fa1.log | 10 +- ...7_rc-rocwmma__hblt0__fa1__longctx32768.log | 9 + ...ir-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log | 10 - ...-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log | 10 +- ...-of-00002__rocm7_rc__fa1__longctx32768.log | 6 + ...4_K_XL-00001-of-00002__rocm7_rc__hblt0.log | 10 - ...L-00001-of-00002__rocm7_rc__hblt0__fa1.log | 10 +- ...2__rocm7_rc__hblt0__fa1__longctx32768.log} | 10 +- ..._XL-00001-of-00002__vulkan_amdvlk__fa1.log | 6 +- ...002__vulkan_amdvlk__fa1__longctx32768.log} | 10 +- ..._K_XL-00001-of-00002__vulkan_radv__fa1.log | 6 +- ...00002__vulkan_radv__fa1__longctx32768.log} | 10 +- ...03__rocm-7alpha-rocwmma-improved__fa1.log} | 10 +- ...a-rocwmma-improved__fa1__longctx32768.log} | 10 +- ...001-of-00003__rocm-7alpha-rocwmma__fa1.log | 10 + ...rocm-7alpha-rocwmma__fa1__longctx32768.log | 9 + ..._K_XL-00001-of-00003__rocm-7alpha__fa1.log | 10 + ...-00003__rocm-7alpha__fa1__longctx32768.log | 10 + ...K_XL-00001-of-00003__rocm6_4_4-rocwmma.log | 15 - ...00001-of-00003__rocm6_4_4-rocwmma__fa1.log | 10 +- ...__rocm6_4_4-rocwmma__fa1__longctx32768.log | 6 + ...f-00003__rocm6_4_4-rocwmma__hblt0__fa1.log | 10 +- ..._4_4-rocwmma__hblt0__fa1__longctx32768.log | 9 + ...r-UD-Q6_K_XL-00001-of-00003__rocm6_4_4.log | 15 - ...Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log | 10 +- ...of-00003__rocm6_4_4__fa1__longctx32768.log | 10 + ...-00001-of-00003__rocm6_4_4__hblt0__fa1.log | 10 +- ...3__rocm6_4_4__hblt0__fa1__longctx32768.log | 9 + ...L-00001-of-00003__rocm7.1-rocwmma__fa1.log | 10 + ...03__rocm7.1-rocwmma__fa1__longctx32768.log | 9 + ...-of-00003__rocm7.1-rocwmma__hblt0__fa1.log | 10 + ...m7.1-rocwmma__hblt0__fa1__longctx32768.log | 9 + ...D-Q6_K_XL-00001-of-00003__rocm7.1__fa1.log | 10 + ...1-of-00003__rocm7.1__fa1__longctx32768.log | 6 + ...XL-00001-of-00003__rocm7.1__hblt0__fa1.log | 10 + ...003__rocm7.1__hblt0__fa1__longctx32768.log | 6 + ..._K_XL-00001-of-00003__rocm7_rc-rocwmma.log | 10 - ...-00001-of-00003__rocm7_rc-rocwmma__fa1.log | 10 +- ...__rocm7_rc-rocwmma__fa1__longctx32768.log} | 10 +- ...0001-of-00003__rocm7_rc-rocwmma__hblt0.log | 10 - ...of-00003__rocm7_rc-rocwmma__hblt0__fa1.log | 10 +- ...7_rc-rocwmma__hblt0__fa1__longctx32768.log | 9 + ...-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log | 10 +- ...-of-00003__rocm7_rc__fa1__longctx32768.log | 6 + ...6_K_XL-00001-of-00003__rocm7_rc__hblt0.log | 10 - ...L-00001-of-00003__rocm7_rc__hblt0__fa1.log | 10 +- ...03__rocm7_rc__hblt0__fa1__longctx32768.log | 9 + ..._XL-00001-of-00003__vulkan_amdvlk__fa1.log | 6 +- ...003__vulkan_amdvlk__fa1__longctx32768.log} | 10 +- ..._K_XL-00001-of-00003__vulkan_radv__fa1.log | 6 +- ...00003__vulkan_radv__fa1__longctx32768.log} | 10 +- ...02__rocm-7alpha-rocwmma-improved__fa1.log} | 10 +- ...a-rocwmma-improved__fa1__longctx32768.log} | 10 +- ...001-of-00002__rocm-7alpha-rocwmma__fa1.log | 10 + ...rocm-7alpha-rocwmma__fa1__longctx32768.log | 10 + ..._K_XL-00001-of-00002__rocm-7alpha__fa1.log | 10 + ...-00002__rocm-7alpha__fa1__longctx32768.log | 10 + ...K_XL-00001-of-00002__rocm6_4_4-rocwmma.log | 15 - ...00001-of-00002__rocm6_4_4-rocwmma__fa1.log | 10 +- ...__rocm6_4_4-rocwmma__fa1__longctx32768.log | 9 + ...f-00002__rocm6_4_4-rocwmma__hblt0__fa1.log | 10 +- ..._4_4-rocwmma__hblt0__fa1__longctx32768.log | 9 + ...t-UD-Q8_K_XL-00001-of-00002__rocm6_4_4.log | 15 - ...Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log | 10 +- ...of-00002__rocm6_4_4__fa1__longctx32768.log | 6 + ...-00001-of-00002__rocm6_4_4__hblt0__fa1.log | 10 +- ...2__rocm6_4_4__hblt0__fa1__longctx32768.log | 10 + ...L-00001-of-00002__rocm7.1-rocwmma__fa1.log | 10 + ...02__rocm7.1-rocwmma__fa1__longctx32768.log | 10 + ...-of-00002__rocm7.1-rocwmma__hblt0__fa1.log | 10 + ...m7.1-rocwmma__hblt0__fa1__longctx32768.log | 10 + ...D-Q8_K_XL-00001-of-00002__rocm7.1__fa1.log | 10 + ...1-of-00002__rocm7.1__fa1__longctx32768.log | 10 + ...XL-00001-of-00002__rocm7.1__hblt0__fa1.log | 10 + ...002__rocm7.1__hblt0__fa1__longctx32768.log | 10 + ..._K_XL-00001-of-00002__rocm7_rc-rocwmma.log | 10 - ...-00001-of-00002__rocm7_rc-rocwmma__fa1.log | 10 +- ...2__rocm7_rc-rocwmma__fa1__longctx32768.log | 9 + ...0001-of-00002__rocm7_rc-rocwmma__hblt0.log | 10 - ...of-00002__rocm7_rc-rocwmma__hblt0__fa1.log | 10 +- ..._rc-rocwmma__hblt0__fa1__longctx32768.log} | 10 +- ...-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log | 10 +- ...of-00002__rocm7_rc__fa1__longctx32768.log} | 10 +- ...L-00001-of-00002__rocm7_rc__hblt0__fa1.log | 10 +- ...02__rocm7_rc__hblt0__fa1__longctx32768.log | 6 + ..._XL-00001-of-00002__vulkan_amdvlk__fa1.log | 4 +- ...002__vulkan_amdvlk__fa1__longctx32768.log} | 10 +- ..._K_XL-00001-of-00002__vulkan_radv__fa1.log | 6 +- ...00002__vulkan_radv__fa1__longctx32768.log} | 10 +- ...02__rocm-7alpha-rocwmma-improved__fa1.log} | 10 +- ...ha-rocwmma-improved__fa1__longctx32768.log | 9 + ...01-of-00002__rocm-7alpha-rocwmma__fa1.log} | 10 +- ...rocm-7alpha-rocwmma__fa1__longctx32768.log | 10 + ...-Q6_K-00001-of-00002__rocm-7alpha__fa1.log | 10 + ...-00002__rocm-7alpha__fa1__longctx32768.log | 9 + ...Q6_K-00001-of-00002__rocm6_4_4-rocwmma.log | 15 - ...00001-of-00002__rocm6_4_4-rocwmma__fa1.log | 10 +- ...__rocm6_4_4-rocwmma__fa1__longctx32768.log | 10 + ...f-00002__rocm6_4_4-rocwmma__hblt0__fa1.log | 8 +- ..._4_4-rocwmma__hblt0__fa1__longctx32768.log | 9 + ...nstruct-Q6_K-00001-of-00002__rocm6_4_4.log | 15 - ...ct-Q6_K-00001-of-00002__rocm6_4_4__fa1.log | 10 +- ...of-00002__rocm6_4_4__fa1__longctx32768.log | 10 + ...-00001-of-00002__rocm6_4_4__hblt0__fa1.log | 8 +- ...2__rocm6_4_4__hblt0__fa1__longctx32768.log | 6 + ...K-00001-of-00002__rocm7.1-rocwmma__fa1.log | 10 + ...02__rocm7.1-rocwmma__fa1__longctx32768.log | 10 + ...-of-00002__rocm7.1-rocwmma__hblt0__fa1.log | 10 + ...m7.1-rocwmma__hblt0__fa1__longctx32768.log | 9 + ...ruct-Q6_K-00001-of-00002__rocm7.1__fa1.log | 6 + ...1-of-00002__rocm7.1__fa1__longctx32768.log | 9 + ..._K-00001-of-00002__rocm7.1__hblt0__fa1.log | 10 + ...002__rocm7.1__hblt0__fa1__longctx32768.log | 10 + ...-00001-of-00002__rocm7_rc-rocwmma__fa1.log | 10 +- ...__rocm7_rc-rocwmma__fa1__longctx32768.log} | 10 +- ...of-00002__rocm7_rc-rocwmma__hblt0__fa1.log | 10 +- ..._rc-rocwmma__hblt0__fa1__longctx32768.log} | 10 +- ...uct-Q6_K-00001-of-00002__rocm7_rc__fa1.log | 10 +- ...of-00002__rocm7_rc__fa1__longctx32768.log} | 10 +- ...K-00001-of-00002__rocm7_rc__hblt0__fa1.log | 8 +- ...02__rocm7_rc__hblt0__fa1__longctx32768.log | 10 + ...6_K-00001-of-00002__vulkan_amdvlk__fa1.log | 6 +- ...002__vulkan_amdvlk__fa1__longctx32768.log} | 10 +- ...-Q6_K-00001-of-00002__vulkan_radv__fa1.log | 6 +- ...00002__vulkan_radv__fa1__longctx32768.log} | 10 +- ...03__rocm-7alpha-rocwmma-improved__fa1.log} | 10 +- ...a-rocwmma-improved__fa1__longctx32768.log} | 10 +- ...001-of-00003__rocm-7alpha-rocwmma__fa1.log | 10 + ...rocm-7alpha-rocwmma__fa1__longctx32768.log | 10 + ...-Q8_0-00001-of-00003__rocm-7alpha__fa1.log | 10 + ...-00003__rocm-7alpha__fa1__longctx32768.log | 9 + ...Q8_0-00001-of-00003__rocm6_4_4-rocwmma.log | 15 - ...00001-of-00003__rocm6_4_4-rocwmma__fa1.log | 10 +- ...__rocm6_4_4-rocwmma__fa1__longctx32768.log | 9 + ...f-00003__rocm6_4_4-rocwmma__hblt0__fa1.log | 10 +- ..._4_4-rocwmma__hblt0__fa1__longctx32768.log | 9 + ...nstruct-Q8_0-00001-of-00003__rocm6_4_4.log | 15 - ...ct-Q8_0-00001-of-00003__rocm6_4_4__fa1.log | 10 +- ...of-00003__rocm6_4_4__fa1__longctx32768.log | 10 + ...-00001-of-00003__rocm6_4_4__hblt0__fa1.log | 10 +- ...3__rocm6_4_4__hblt0__fa1__longctx32768.log | 6 + ...0-00001-of-00003__rocm7.1-rocwmma__fa1.log | 10 + ...03__rocm7.1-rocwmma__fa1__longctx32768.log | 10 + ...-of-00003__rocm7.1-rocwmma__hblt0__fa1.log | 10 + ...m7.1-rocwmma__hblt0__fa1__longctx32768.log | 10 + ...ruct-Q8_0-00001-of-00003__rocm7.1__fa1.log | 10 + ...1-of-00003__rocm7.1__fa1__longctx32768.log | 6 + ..._0-00001-of-00003__rocm7.1__hblt0__fa1.log | 10 + ...003__rocm7.1__hblt0__fa1__longctx32768.log | 10 + ...-00001-of-00003__rocm7_rc-rocwmma__fa1.log | 10 +- ...3__rocm7_rc-rocwmma__fa1__longctx32768.log | 9 + ...0001-of-00003__rocm7_rc-rocwmma__hblt0.log | 10 - ...of-00003__rocm7_rc-rocwmma__hblt0__fa1.log | 10 +- ..._rc-rocwmma__hblt0__fa1__longctx32768.log} | 10 +- ...uct-Q8_0-00001-of-00003__rocm7_rc__fa1.log | 10 +- ...of-00003__rocm7_rc__fa1__longctx32768.log} | 10 +- ...0-00001-of-00003__rocm7_rc__hblt0__fa1.log | 8 +- ...3__rocm7_rc__hblt0__fa1__longctx32768.log} | 10 +- ...8_0-00001-of-00003__vulkan_amdvlk__fa1.log | 4 +- ...003__vulkan_amdvlk__fa1__longctx32768.log} | 10 +- ...-Q8_0-00001-of-00003__vulkan_radv__fa1.log | 6 +- ...00003__vulkan_radv__fa1__longctx32768.log} | 10 +- ...02__rocm-7alpha-rocwmma-improved__fa1.log} | 10 +- ...ha-rocwmma-improved__fa1__longctx32768.log | 10 + ...001-of-00002__rocm-7alpha-rocwmma__fa1.log | 10 + ...rocm-7alpha-rocwmma__fa1__longctx32768.log | 10 + ..._K_XL-00001-of-00002__rocm-7alpha__fa1.log | 10 + ...-00002__rocm-7alpha__fa1__longctx32768.log | 7 + ...K_XL-00001-of-00002__rocm6_4_4-rocwmma.log | 15 - ...00001-of-00002__rocm6_4_4-rocwmma__fa1.log | 10 +- ...__rocm6_4_4-rocwmma__fa1__longctx32768.log | 10 + ...001-of-00002__rocm6_4_4-rocwmma__hblt0.log | 6 - ...f-00002__rocm6_4_4-rocwmma__hblt0__fa1.log | 10 +- ..._4_4-rocwmma__hblt0__fa1__longctx32768.log | 10 + ...t-UD-Q4_K_XL-00001-of-00002__rocm6_4_4.log | 15 - ...Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log | 10 +- ...of-00002__rocm6_4_4__fa1__longctx32768.log | 10 + ...-00001-of-00002__rocm6_4_4__hblt0__fa1.log | 10 +- ...2__rocm6_4_4__hblt0__fa1__longctx32768.log | 10 + ...L-00001-of-00002__rocm7.1-rocwmma__fa1.log | 10 + ...02__rocm7.1-rocwmma__fa1__longctx32768.log | 10 + ...-of-00002__rocm7.1-rocwmma__hblt0__fa1.log | 10 + ...m7.1-rocwmma__hblt0__fa1__longctx32768.log | 10 + ...D-Q4_K_XL-00001-of-00002__rocm7.1__fa1.log | 10 + ...1-of-00002__rocm7.1__fa1__longctx32768.log | 10 + ...XL-00001-of-00002__rocm7.1__hblt0__fa1.log | 10 + ...002__rocm7.1__hblt0__fa1__longctx32768.log | 10 + ...-00001-of-00002__rocm7_rc-rocwmma__fa1.log | 10 +- ...__rocm7_rc-rocwmma__fa1__longctx32768.log} | 10 +- ...of-00002__rocm7_rc-rocwmma__hblt0__fa1.log | 10 +- ..._rc-rocwmma__hblt0__fa1__longctx32768.log} | 10 +- ...ct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log | 10 - ...-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log | 10 +- ...of-00002__rocm7_rc__fa1__longctx32768.log} | 10 +- ...L-00001-of-00002__rocm7_rc__hblt0__fa1.log | 10 +- ...2__rocm7_rc__hblt0__fa1__longctx32768.log} | 4 +- ..._XL-00001-of-00002__vulkan_amdvlk__fa1.log | 6 +- ...002__vulkan_amdvlk__fa1__longctx32768.log} | 10 +- ..._K_XL-00001-of-00002__vulkan_radv__fa1.log | 6 +- ...00002__vulkan_radv__fa1__longctx32768.log} | 10 +- ...03__rocm-7alpha-rocwmma-improved__fa1.log} | 10 +- ...a-rocwmma-improved__fa1__longctx32768.log} | 10 +- ...001-of-00003__rocm-7alpha-rocwmma__fa1.log | 10 + ...rocm-7alpha-rocwmma__fa1__longctx32768.log | 9 + ..._K_XL-00001-of-00003__rocm-7alpha__fa1.log | 10 + ...-00003__rocm-7alpha__fa1__longctx32768.log | 10 + ...K_XL-00001-of-00003__rocm6_4_4-rocwmma.log | 15 - ...00001-of-00003__rocm6_4_4-rocwmma__fa1.log | 10 +- ...__rocm6_4_4-rocwmma__fa1__longctx32768.log | 9 + ...f-00003__rocm6_4_4-rocwmma__hblt0__fa1.log | 10 +- ..._4_4-rocwmma__hblt0__fa1__longctx32768.log | 9 + ...7-UD-Q3_K_XL-00001-of-00003__rocm6_4_4.log | 15 - ...Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log | 10 +- ...of-00003__rocm6_4_4__fa1__longctx32768.log | 10 + ...-00001-of-00003__rocm6_4_4__hblt0__fa1.log | 10 +- ...3__rocm6_4_4__hblt0__fa1__longctx32768.log | 10 + ...L-00001-of-00003__rocm7.1-rocwmma__fa1.log | 10 + ...03__rocm7.1-rocwmma__fa1__longctx32768.log | 9 + ...-of-00003__rocm7.1-rocwmma__hblt0__fa1.log | 10 + ...m7.1-rocwmma__hblt0__fa1__longctx32768.log | 10 + ...D-Q3_K_XL-00001-of-00003__rocm7.1__fa1.log | 10 + ...1-of-00003__rocm7.1__fa1__longctx32768.log | 10 + ...XL-00001-of-00003__rocm7.1__hblt0__fa1.log | 10 + ...003__rocm7.1__hblt0__fa1__longctx32768.log | 10 + ..._K_XL-00001-of-00003__rocm7_rc-rocwmma.log | 10 - ...-00001-of-00003__rocm7_rc-rocwmma__fa1.log | 10 +- ...__rocm7_rc-rocwmma__fa1__longctx32768.log} | 10 +- ...0001-of-00003__rocm7_rc-rocwmma__hblt0.log | 10 - ...of-00003__rocm7_rc-rocwmma__hblt0__fa1.log | 10 +- ...7_rc-rocwmma__hblt0__fa1__longctx32768.log | 9 + ...-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log | 10 +- ...of-00003__rocm7_rc__fa1__longctx32768.log} | 10 +- ...L-00001-of-00003__rocm7_rc__hblt0__fa1.log | 10 +- ...03__rocm7_rc__hblt0__fa1__longctx32768.log | 7 + ..._XL-00001-of-00003__vulkan_amdvlk__fa1.log | 6 +- ...003__vulkan_amdvlk__fa1__longctx32768.log} | 10 +- ..._K_XL-00001-of-00003__vulkan_radv__fa1.log | 6 +- ...00003__vulkan_radv__fa1__longctx32768.log} | 10 +- ...02__rocm-7alpha-rocwmma-improved__fa1.log} | 10 +- ...a-rocwmma-improved__fa1__longctx32768.log} | 10 +- ...001-of-00002__rocm-7alpha-rocwmma__fa1.log | 10 + ...rocm-7alpha-rocwmma__fa1__longctx32768.log | 10 + ...-BF16-00001-of-00002__rocm-7alpha__fa1.log | 10 + ...-00002__rocm-7alpha__fa1__longctx32768.log | 10 + ...BF16-00001-of-00002__rocm6_4_4-rocwmma.log | 15 - ...00001-of-00002__rocm6_4_4-rocwmma__fa1.log | 10 +- ...__rocm6_4_4-rocwmma__fa1__longctx32768.log | 10 + ...f-00002__rocm6_4_4-rocwmma__hblt0__fa1.log | 10 +- ..._4_4-rocwmma__hblt0__fa1__longctx32768.log | 10 + ...30B-A3B-BF16-00001-of-00002__rocm6_4_4.log | 15 - ...3B-BF16-00001-of-00002__rocm6_4_4__fa1.log | 10 +- ...of-00002__rocm6_4_4__fa1__longctx32768.log | 10 + ...-00001-of-00002__rocm6_4_4__hblt0__fa1.log | 10 +- ...2__rocm6_4_4__hblt0__fa1__longctx32768.log | 10 + ...6-00001-of-00002__rocm7.1-rocwmma__fa1.log | 10 + ...02__rocm7.1-rocwmma__fa1__longctx32768.log | 10 + ...-of-00002__rocm7.1-rocwmma__hblt0__fa1.log | 10 + ...m7.1-rocwmma__hblt0__fa1__longctx32768.log | 10 + ...-A3B-BF16-00001-of-00002__rocm7.1__fa1.log | 10 + ...1-of-00002__rocm7.1__fa1__longctx32768.log | 10 + ...16-00001-of-00002__rocm7.1__hblt0__fa1.log | 10 + ...002__rocm7.1__hblt0__fa1__longctx32768.log | 10 + ...-00001-of-00002__rocm7_rc-rocwmma__fa1.log | 10 +- ...__rocm7_rc-rocwmma__fa1__longctx32768.log} | 10 +- ...of-00002__rocm7_rc-rocwmma__hblt0__fa1.log | 10 +- ..._rc-rocwmma__hblt0__fa1__longctx32768.log} | 10 +- ...A3B-BF16-00001-of-00002__rocm7_rc__fa1.log | 10 +- ...of-00002__rocm7_rc__fa1__longctx32768.log} | 10 +- ...6-00001-of-00002__rocm7_rc__hblt0__fa1.log | 10 +- ...2__rocm7_rc__hblt0__fa1__longctx32768.log} | 10 +- ...F16-00001-of-00002__vulkan_amdvlk__fa1.log | 6 +- ...002__vulkan_amdvlk__fa1__longctx32768.log} | 10 +- ...-BF16-00001-of-00002__vulkan_radv__fa1.log | 6 +- ...00002__vulkan_radv__fa1__longctx32768.log} | 10 +- ...XL__rocm-7alpha-rocwmma-improved__fa1.log} | 10 +- ...a-rocwmma-improved__fa1__longctx32768.log} | 10 +- ...7-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1.log | 10 + ...rocm-7alpha-rocwmma__fa1__longctx32768.log | 10 + ...ruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1.log | 10 + ...6_K_XL__rocm-7alpha__fa1__longctx32768.log | 10 + ...uct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma.log | 15 - ...507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1.log | 10 +- ...__rocm6_4_4-rocwmma__fa1__longctx32768.log | 10 + ...Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log | 10 +- ..._4_4-rocwmma__hblt0__fa1__longctx32768.log | 10 + ...3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4.log | 15 - ...struct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log | 10 +- ...-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log | 10 + ...2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log | 10 +- ...L__rocm6_4_4__hblt0__fa1__longctx32768.log | 10 + ...-2507-UD-Q6_K_XL__rocm7.1-rocwmma__fa1.log | 10 + ...XL__rocm7.1-rocwmma__fa1__longctx32768.log | 10 + ...D-Q6_K_XL__rocm7.1-rocwmma__hblt0__fa1.log | 10 + ...m7.1-rocwmma__hblt0__fa1__longctx32768.log | 10 + ...Instruct-2507-UD-Q6_K_XL__rocm7.1__fa1.log | 10 + ...UD-Q6_K_XL__rocm7.1__fa1__longctx32768.log | 10 + ...t-2507-UD-Q6_K_XL__rocm7.1__hblt0__fa1.log | 10 + ..._XL__rocm7.1__hblt0__fa1__longctx32768.log | 10 + ...2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log | 10 +- ...__rocm7_rc-rocwmma__fa1__longctx32768.log} | 10 +- ...-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log | 10 +- ..._rc-rocwmma__hblt0__fa1__longctx32768.log} | 10 +- ...nstruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log | 10 +- ...-Q6_K_XL__rocm7_rc__fa1__longctx32768.log} | 10 +- ...-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log | 10 +- ...L__rocm7_rc__hblt0__fa1__longctx32768.log} | 10 +- ...ct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log | 6 +- ..._XL__vulkan_amdvlk__fa1__longctx32768.log} | 10 +- ...ruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log | 6 +- ..._K_XL__vulkan_radv__fa1__longctx32768.log} | 10 +- ..._M__rocm-7alpha-rocwmma-improved__fa1.log} | 10 +- ...a-rocwmma-improved__fa1__longctx32768.log} | 10 +- ...truct-Q4_K_M__rocm-7alpha-rocwmma__fa1.log | 10 + ...rocm-7alpha-rocwmma__fa1__longctx32768.log | 10 + ...-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1.log | 10 + ...Q4_K_M__rocm-7alpha__fa1__longctx32768.log | 10 + ...A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma.log | 15 - ...nstruct-Q4_K_M__rocm6_4_4-rocwmma__fa1.log | 10 +- ...__rocm6_4_4-rocwmma__fa1__longctx32768.log | 10 + ...-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1.log | 10 +- ..._4_4-rocwmma__hblt0__fa1__longctx32768.log | 10 + ...der-30B-A3B-Instruct-Q4_K_M__rocm6_4_4.log | 15 - ...0B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log | 10 +- ...t-Q4_K_M__rocm6_4_4__fa1__longctx32768.log | 10 + ...Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log | 10 +- ...M__rocm6_4_4__hblt0__fa1__longctx32768.log | 10 + ...-Instruct-Q4_K_M__rocm7.1-rocwmma__fa1.log | 10 + ..._M__rocm7.1-rocwmma__fa1__longctx32768.log | 10 + ...ct-Q4_K_M__rocm7.1-rocwmma__hblt0__fa1.log | 10 + ...m7.1-rocwmma__hblt0__fa1__longctx32768.log | 10 + ...-30B-A3B-Instruct-Q4_K_M__rocm7.1__fa1.log | 10 + ...uct-Q4_K_M__rocm7.1__fa1__longctx32768.log | 10 + ...B-Instruct-Q4_K_M__rocm7.1__hblt0__fa1.log | 10 + ...K_M__rocm7.1__hblt0__fa1__longctx32768.log | 10 + ...Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1.log | 10 +- ...__rocm7_rc-rocwmma__fa1__longctx32768.log} | 10 +- ...t-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1.log | 10 +- ..._rc-rocwmma__hblt0__fa1__longctx32768.log} | 10 +- ...30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1.log | 10 +- ...t-Q4_K_M__rocm7_rc__fa1__longctx32768.log} | 10 +- ...-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1.log | 10 +- ...M__rocm7_rc__hblt0__fa1__longctx32768.log} | 10 +- ...3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log | 6 +- ...K_M__vulkan_amdvlk__fa1__longctx32768.log} | 10 +- ...-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log | 6 +- ...4_K_M__vulkan_radv__fa1__longctx32768.log} | 10 +- ...XL__rocm-7alpha-rocwmma-improved__fa1.log} | 10 +- ...a-rocwmma-improved__fa1__longctx32768.log} | 10 +- ...t-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log | 10 + ...rocm-7alpha-rocwmma__fa1__longctx32768.log | 10 + ...-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1.log | 10 + ...8_K_XL__rocm-7alpha__fa1__longctx32768.log | 10 + ...3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma.log | 15 - ...-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log | 10 +- ...__rocm6_4_4-rocwmma__fa1__longctx32768.log | 10 + ...Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log | 10 +- ..._4_4-rocwmma__hblt0__fa1__longctx32768.log | 10 + .../gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4.log | 15 - ...ma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log | 10 +- ...-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log | 10 + ...b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log | 10 +- ...L__rocm6_4_4__hblt0__fa1__longctx32768.log | 10 + ...2b-it-UD-Q8_K_XL__rocm7.1-rocwmma__fa1.log | 10 + ...XL__rocm7.1-rocwmma__fa1__longctx32768.log | 10 + ...D-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1.log | 10 + ...m7.1-rocwmma__hblt0__fa1__longctx32768.log | 10 + ...emma-3-12b-it-UD-Q8_K_XL__rocm7.1__fa1.log | 10 + ...UD-Q8_K_XL__rocm7.1__fa1__longctx32768.log | 10 + ...12b-it-UD-Q8_K_XL__rocm7.1__hblt0__fa1.log | 10 + ..._XL__rocm7.1__hblt0__fa1__longctx32768.log | 10 + ...b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log | 10 +- ...__rocm7_rc-rocwmma__fa1__longctx32768.log} | 10 +- ...-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log | 10 +- ..._rc-rocwmma__hblt0__fa1__longctx32768.log} | 10 +- ...mma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log | 10 +- ...-Q8_K_XL__rocm7_rc__fa1__longctx32768.log} | 10 +- ...2b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log | 10 +- ...L__rocm7_rc__hblt0__fa1__longctx32768.log} | 10 +- ...-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log | 6 +- ..._XL__vulkan_amdvlk__fa1__longctx32768.log} | 10 +- ...-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log | 6 +- ..._K_XL__vulkan_radv__fa1__longctx32768.log} | 10 +- ...02__rocm-7alpha-rocwmma-improved__fa1.log} | 10 +- ...a-rocwmma-improved__fa1__longctx32768.log} | 10 +- ...001-of-00002__rocm-7alpha-rocwmma__fa1.log | 10 + ...rocm-7alpha-rocwmma__fa1__longctx32768.log | 10 + ...-BF16-00001-of-00002__rocm-7alpha__fa1.log | 10 + ...-00002__rocm-7alpha__fa1__longctx32768.log | 10 + ...BF16-00001-of-00002__rocm6_4_4-rocwmma.log | 15 - ...00001-of-00002__rocm6_4_4-rocwmma__fa1.log | 10 +- ...__rocm6_4_4-rocwmma__fa1__longctx32768.log | 10 + ...f-00002__rocm6_4_4-rocwmma__hblt0__fa1.log | 10 +- ..._4_4-rocwmma__hblt0__fa1__longctx32768.log | 10 + ...-27b-it-BF16-00001-of-00002__rocm6_4_4.log | 15 - ...it-BF16-00001-of-00002__rocm6_4_4__fa1.log | 10 +- ...of-00002__rocm6_4_4__fa1__longctx32768.log | 10 + ...-00001-of-00002__rocm6_4_4__hblt0__fa1.log | 10 +- ...2__rocm6_4_4__hblt0__fa1__longctx32768.log | 10 + ...6-00001-of-00002__rocm7.1-rocwmma__fa1.log | 10 + ...02__rocm7.1-rocwmma__fa1__longctx32768.log | 10 + ...-of-00002__rocm7.1-rocwmma__hblt0__fa1.log | 10 + ...m7.1-rocwmma__hblt0__fa1__longctx32768.log | 10 + ...b-it-BF16-00001-of-00002__rocm7.1__fa1.log | 10 + ...1-of-00002__rocm7.1__fa1__longctx32768.log | 10 + ...16-00001-of-00002__rocm7.1__hblt0__fa1.log | 10 + ...002__rocm7.1__hblt0__fa1__longctx32768.log | 10 + ...-00001-of-00002__rocm7_rc-rocwmma__fa1.log | 10 +- ...__rocm7_rc-rocwmma__fa1__longctx32768.log} | 10 +- ...of-00002__rocm7_rc-rocwmma__hblt0__fa1.log | 10 +- ..._rc-rocwmma__hblt0__fa1__longctx32768.log} | 10 +- ...-it-BF16-00001-of-00002__rocm7_rc__fa1.log | 10 +- ...of-00002__rocm7_rc__fa1__longctx32768.log} | 10 +- ...6-00001-of-00002__rocm7_rc__hblt0__fa1.log | 10 +- ...2__rocm7_rc__hblt0__fa1__longctx32768.log} | 10 +- ...002__vulkan_amdvlk__fa1__longctx32768.log} | 6 +- ...-BF16-00001-of-00002__vulkan_radv__fa1.log | 6 +- ...00002__vulkan_radv__fa1__longctx32768.log} | 10 +- ..._S__rocm-7alpha-rocwmma-improved__fa1.log} | 10 +- ...a-rocwmma-improved__fa1__longctx32768.log} | 10 +- ...4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1.log | 10 + ...rocm-7alpha-rocwmma__fa1__longctx32768.log | 10 + ...gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1.log | 10 + ...Q3_K_S__rocm-7alpha__fa1__longctx32768.log | 10 + ...emma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma.log | 15 - ...3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1.log | 10 +- ...__rocm6_4_4-rocwmma__fa1__longctx32768.log | 10 + ...-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1.log | 10 +- ..._4_4-rocwmma__hblt0__fa1__longctx32768.log | 10 + .../gemma-3-4b-it-Q3_K_S__rocm6_4_4.log | 15 - .../gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log | 10 +- ...t-Q3_K_S__rocm6_4_4__fa1__longctx32768.log | 10 + ...-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log | 10 +- ...S__rocm6_4_4__hblt0__fa1__longctx32768.log | 10 + ...a-3-4b-it-Q3_K_S__rocm7.1-rocwmma__fa1.log | 10 + ..._S__rocm7.1-rocwmma__fa1__longctx32768.log | 10 + ...it-Q3_K_S__rocm7.1-rocwmma__hblt0__fa1.log | 10 + ...m7.1-rocwmma__hblt0__fa1__longctx32768.log | 10 + .../gemma-3-4b-it-Q3_K_S__rocm7.1__fa1.log | 10 + ...-it-Q3_K_S__rocm7.1__fa1__longctx32768.log | 10 + ...ma-3-4b-it-Q3_K_S__rocm7.1__hblt0__fa1.log | 10 + ...K_S__rocm7.1__hblt0__fa1__longctx32768.log | 10 + ...-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log | 10 +- ...__rocm7_rc-rocwmma__fa1__longctx32768.log} | 10 +- ...t-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log | 10 +- ..._rc-rocwmma__hblt0__fa1__longctx32768.log} | 10 +- .../gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log | 10 +- ...t-Q3_K_S__rocm7_rc__fa1__longctx32768.log} | 10 +- ...a-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log | 10 +- ...S__rocm7_rc__hblt0__fa1__longctx32768.log} | 10 +- ...mma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log | 6 +- ...K_S__vulkan_amdvlk__fa1__longctx32768.log} | 10 +- ...gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log | 6 +- ...3_K_S__vulkan_radv__fa1__longctx32768.log} | 10 +- ...16__rocm-7alpha-rocwmma-improved__fa1.log} | 10 +- ...a-rocwmma-improved__fa1__longctx32768.log} | 10 +- ...oss-120b-F16__rocm-7alpha-rocwmma__fa1.log | 10 + ...rocm-7alpha-rocwmma__fa1__longctx32768.log | 10 + .../gpt-oss-120b-F16__rocm-7alpha__fa1.log | 10 + ...0b-F16__rocm-7alpha__fa1__longctx32768.log | 10 + .../gpt-oss-120b-F16__rocm6_4_4-rocwmma.log | 15 - ...t-oss-120b-F16__rocm6_4_4-rocwmma__fa1.log | 10 +- ...__rocm6_4_4-rocwmma__fa1__longctx32768.log | 10 + ...20b-F16__rocm6_4_4-rocwmma__hblt0__fa1.log | 10 +- ..._4_4-rocwmma__hblt0__fa1__longctx32768.log | 10 + .../results/gpt-oss-120b-F16__rocm6_4_4.log | 15 - .../gpt-oss-120b-F16__rocm6_4_4__fa1.log | 10 +- ...120b-F16__rocm6_4_4__fa1__longctx32768.log | 10 + ...pt-oss-120b-F16__rocm6_4_4__hblt0__fa1.log | 10 +- ...6__rocm6_4_4__hblt0__fa1__longctx32768.log | 10 + ...gpt-oss-120b-F16__rocm7.1-rocwmma__fa1.log | 10 + ...16__rocm7.1-rocwmma__fa1__longctx32768.log | 10 + ...-120b-F16__rocm7.1-rocwmma__hblt0__fa1.log | 10 + ...m7.1-rocwmma__hblt0__fa1__longctx32768.log | 10 + .../gpt-oss-120b-F16__rocm7.1__fa1.log | 10 + ...s-120b-F16__rocm7.1__fa1__longctx32768.log | 10 + .../gpt-oss-120b-F16__rocm7.1__hblt0__fa1.log | 10 + ...F16__rocm7.1__hblt0__fa1__longctx32768.log | 10 + ...pt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log | 10 +- ...__rocm7_rc-rocwmma__fa1__longctx32768.log} | 10 +- ...120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log | 10 +- ..._rc-rocwmma__hblt0__fa1__longctx32768.log} | 10 +- .../gpt-oss-120b-F16__rocm7_rc__fa1.log | 8 +- ...120b-F16__rocm7_rc__fa1__longctx32768.log} | 10 +- ...gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log | 10 +- ...6__rocm7_rc__hblt0__fa1__longctx32768.log} | 10 +- .../gpt-oss-120b-F16__vulkan_amdvlk__fa1.log | 6 +- ...F16__vulkan_amdvlk__fa1__longctx32768.log} | 10 +- .../gpt-oss-120b-F16__vulkan_radv__fa1.log | 6 +- ...b-F16__vulkan_radv__fa1__longctx32768.log} | 10 +- ...03__rocm-7alpha-rocwmma-improved__fa1.log} | 10 +- ...a-rocwmma-improved__fa1__longctx32768.log} | 10 +- ...001-of-00003__rocm-7alpha-rocwmma__fa1.log | 10 + ...rocm-7alpha-rocwmma__fa1__longctx32768.log | 10 + ...mxfp4-00001-of-00003__rocm-7alpha__fa1.log | 10 + ...-00003__rocm-7alpha__fa1__longctx32768.log | 10 + ...xfp4-00001-of-00003__rocm6_4_4-rocwmma.log | 15 - ...00001-of-00003__rocm6_4_4-rocwmma__fa1.log | 10 +- ...__rocm6_4_4-rocwmma__fa1__longctx32768.log | 10 + ...f-00003__rocm6_4_4-rocwmma__hblt0__fa1.log | 10 +- ..._4_4-rocwmma__hblt0__fa1__longctx32768.log | 10 + ...s-120b-mxfp4-00001-of-00003__rocm6_4_4.log | 15 - ...b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log | 10 +- ...of-00003__rocm6_4_4__fa1__longctx32768.log | 10 + ...-00001-of-00003__rocm6_4_4__hblt0__fa1.log | 10 +- ...3__rocm6_4_4__hblt0__fa1__longctx32768.log | 10 + ...4-00001-of-00003__rocm7.1-rocwmma__fa1.log | 10 + ...03__rocm7.1-rocwmma__fa1__longctx32768.log | 10 + ...-of-00003__rocm7.1-rocwmma__hblt0__fa1.log | 10 + ...m7.1-rocwmma__hblt0__fa1__longctx32768.log | 10 + ...20b-mxfp4-00001-of-00003__rocm7.1__fa1.log | 10 + ...1-of-00003__rocm7.1__fa1__longctx32768.log | 10 + ...p4-00001-of-00003__rocm7.1__hblt0__fa1.log | 10 + ...003__rocm7.1__hblt0__fa1__longctx32768.log | 10 + ...-00001-of-00003__rocm7_rc-rocwmma__fa1.log | 10 +- ...__rocm7_rc-rocwmma__fa1__longctx32768.log} | 10 +- ...of-00003__rocm7_rc-rocwmma__hblt0__fa1.log | 10 +- ..._rc-rocwmma__hblt0__fa1__longctx32768.log} | 10 +- ...0b-mxfp4-00001-of-00003__rocm7_rc__fa1.log | 10 +- ...of-00003__rocm7_rc__fa1__longctx32768.log} | 10 +- ...4-00001-of-00003__rocm7_rc__hblt0__fa1.log | 10 +- ...3__rocm7_rc__hblt0__fa1__longctx32768.log} | 10 +- ...fp4-00001-of-00003__vulkan_amdvlk__fa1.log | 6 +- ...003__vulkan_amdvlk__fa1__longctx32768.log} | 10 +- ...mxfp4-00001-of-00003__vulkan_radv__fa1.log | 6 +- ...00003__vulkan_radv__fa1__longctx32768.log} | 10 +- ...32__rocm-7alpha-rocwmma-improved__fa1.log} | 10 +- ...a-rocwmma-improved__fa1__longctx32768.log} | 10 +- ...-oss-20b-F32__rocm-7alpha-rocwmma__fa1.log | 10 + ...rocm-7alpha-rocwmma__fa1__longctx32768.log | 10 + .../gpt-oss-20b-F32__rocm-7alpha__fa1.log | 10 + ...0b-F32__rocm-7alpha__fa1__longctx32768.log | 10 + .../gpt-oss-20b-F32__rocm6_4_4-rocwmma.log | 15 - ...pt-oss-20b-F32__rocm6_4_4-rocwmma__fa1.log | 10 +- ...__rocm6_4_4-rocwmma__fa1__longctx32768.log | 10 + ...20b-F32__rocm6_4_4-rocwmma__hblt0__fa1.log | 10 +- ..._4_4-rocwmma__hblt0__fa1__longctx32768.log | 10 + .../results/gpt-oss-20b-F32__rocm6_4_4.log | 15 - .../gpt-oss-20b-F32__rocm6_4_4__fa1.log | 10 +- ...-20b-F32__rocm6_4_4__fa1__longctx32768.log | 10 + ...gpt-oss-20b-F32__rocm6_4_4__hblt0__fa1.log | 10 +- ...2__rocm6_4_4__hblt0__fa1__longctx32768.log | 10 + .../gpt-oss-20b-F32__rocm7.1-rocwmma__fa1.log | 10 + ...32__rocm7.1-rocwmma__fa1__longctx32768.log | 10 + ...s-20b-F32__rocm7.1-rocwmma__hblt0__fa1.log | 10 + ...m7.1-rocwmma__hblt0__fa1__longctx32768.log | 10 + .../results/gpt-oss-20b-F32__rocm7.1__fa1.log | 10 + ...ss-20b-F32__rocm7.1__fa1__longctx32768.log | 10 + .../gpt-oss-20b-F32__rocm7.1__hblt0__fa1.log | 10 + ...F32__rocm7.1__hblt0__fa1__longctx32768.log | 10 + ...gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log | 10 +- ...__rocm7_rc-rocwmma__fa1__longctx32768.log} | 10 +- ...-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log | 10 +- ..._rc-rocwmma__hblt0__fa1__longctx32768.log} | 10 +- .../gpt-oss-20b-F32__rocm7_rc__fa1.log | 10 +- ...-20b-F32__rocm7_rc__fa1__longctx32768.log} | 10 +- .../gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log | 10 +- ...2__rocm7_rc__hblt0__fa1__longctx32768.log} | 10 +- .../gpt-oss-20b-F32__vulkan_amdvlk__fa1.log | 6 +- ...F32__vulkan_amdvlk__fa1__longctx32768.log} | 10 +- .../gpt-oss-20b-F32__vulkan_radv__fa1.log | 6 +- ...b-F32__vulkan_radv__fa1__longctx32768.log} | 10 +- ...p4__rocm-7alpha-rocwmma-improved__fa1.log} | 10 +- ...a-rocwmma-improved__fa1__longctx32768.log} | 10 +- ...ss-20b-mxfp4__rocm-7alpha-rocwmma__fa1.log | 10 + ...rocm-7alpha-rocwmma__fa1__longctx32768.log | 10 + .../gpt-oss-20b-mxfp4__rocm-7alpha__fa1.log | 10 + ...-mxfp4__rocm-7alpha__fa1__longctx32768.log | 10 + .../gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma.log | 15 - ...-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1.log | 10 +- ...__rocm6_4_4-rocwmma__fa1__longctx32768.log | 10 + ...b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1.log | 10 +- ..._4_4-rocwmma__hblt0__fa1__longctx32768.log | 10 + .../results/gpt-oss-20b-mxfp4__rocm6_4_4.log | 15 - .../gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log | 10 +- ...0b-mxfp4__rocm6_4_4__fa1__longctx32768.log | 10 + ...t-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log | 10 +- ...4__rocm6_4_4__hblt0__fa1__longctx32768.log | 10 + ...pt-oss-20b-mxfp4__rocm7.1-rocwmma__fa1.log | 10 + ...p4__rocm7.1-rocwmma__fa1__longctx32768.log | 10 + ...20b-mxfp4__rocm7.1-rocwmma__hblt0__fa1.log | 10 + ...m7.1-rocwmma__hblt0__fa1__longctx32768.log | 10 + .../gpt-oss-20b-mxfp4__rocm7.1__fa1.log | 10 + ...-20b-mxfp4__rocm7.1__fa1__longctx32768.log | 10 + ...gpt-oss-20b-mxfp4__rocm7.1__hblt0__fa1.log | 10 + ...fp4__rocm7.1__hblt0__fa1__longctx32768.log | 10 + ...t-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log | 10 +- ...__rocm7_rc-rocwmma__fa1__longctx32768.log} | 10 +- ...0b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log | 10 +- ..._rc-rocwmma__hblt0__fa1__longctx32768.log} | 10 +- .../gpt-oss-20b-mxfp4__rocm7_rc__fa1.log | 10 +- ...0b-mxfp4__rocm7_rc__fa1__longctx32768.log} | 10 +- ...pt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log | 10 +- ...4__rocm7_rc__hblt0__fa1__longctx32768.log} | 10 +- .../gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log | 6 +- ...fp4__vulkan_amdvlk__fa1__longctx32768.log} | 10 +- .../gpt-oss-20b-mxfp4__vulkan_radv__fa1.log | 6 +- ...mxfp4__vulkan_radv__fa1__longctx32768.log} | 10 +- ..._0__rocm-7alpha-rocwmma-improved__fa1.log} | 10 +- ...a-rocwmma-improved__fa1__longctx32768.log} | 10 +- ...a-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1.log} | 10 +- ...ocm-7alpha-rocwmma__fa1__longctx32768.log} | 10 +- .../llama-2-7b.Q4_0__rocm-7alpha__fa1.log | 10 + ...b.Q4_0__rocm-7alpha__fa1__longctx32768.log | 10 + ...lama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1.log | 10 +- ...__rocm6_4_4-rocwmma__fa1__longctx32768.log | 10 + ...7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1.log | 10 +- ..._4_4-rocwmma__hblt0__fa1__longctx32768.log | 10 + .../llama-2-7b.Q4_0__rocm6_4_4__fa1.log | 10 +- ...-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log | 10 + ...llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log | 10 +- ...0__rocm6_4_4__hblt0__fa1__longctx32768.log | 10 + .../llama-2-7b.Q4_0__rocm7.1-rocwmma__fa1.log | 10 + ..._0__rocm7.1-rocwmma__fa1__longctx32768.log | 10 + ...2-7b.Q4_0__rocm7.1-rocwmma__hblt0__fa1.log | 10 + ...m7.1-rocwmma__hblt0__fa1__longctx32768.log | 10 + .../results/llama-2-7b.Q4_0__rocm7.1__fa1.log | 10 + ...-2-7b.Q4_0__rocm7.1__fa1__longctx32768.log | 10 + .../llama-2-7b.Q4_0__rocm7.1__hblt0__fa1.log | 10 + ...4_0__rocm7.1__hblt0__fa1__longctx32768.log | 10 + ...llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log | 10 +- ...__rocm7_rc-rocwmma__fa1__longctx32768.log} | 10 +- ...-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log | 10 +- ..._rc-rocwmma__hblt0__fa1__longctx32768.log} | 10 +- .../llama-2-7b.Q4_0__rocm7_rc__fa1.log | 10 +- ...-7b.Q4_0__rocm7_rc__fa1__longctx32768.log} | 10 +- .../llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log | 10 +- ...0__rocm7_rc__hblt0__fa1__longctx32768.log} | 10 +- .../llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log | 6 +- ...4_0__vulkan_amdvlk__fa1__longctx32768.log} | 10 +- .../llama-2-7b.Q4_0__vulkan_radv__fa1.log | 6 +- ....Q4_0__vulkan_radv__fa1__longctx32768.log} | 10 +- docs/assets/index2.css | 503 + docs/assets/index2.js | 687 + docs/index.html | 797 +- docs/results.json | 39691 +++++++++++----- 664 files changed, 32551 insertions(+), 15807 deletions(-) create mode 100644 benchmark/delete_transient_failures.py rename benchmark/results/{GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log => GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log} (56%) create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log rename benchmark/results/{GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0.log => GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log} (56%) create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx32768.log delete mode 100644 benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log delete mode 100644 benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1__longctx32768.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log delete mode 100644 benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log delete mode 100644 benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1__longctx32768.log delete mode 100644 benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log rename benchmark/results/{GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log => GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log} (56%) rename benchmark/results/{GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log => GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log} (60%) rename benchmark/results/{GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log => GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log} (60%) rename benchmark/results/{GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log => GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log} (56%) rename benchmark/results/{GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0.log => GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log} (56%) create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx32768.log delete mode 100644 benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log delete mode 100644 benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__fa1.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__fa1__longctx32768.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__hblt0__fa1.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log delete mode 100644 benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma.log rename benchmark/results/{GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log => GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log} (56%) delete mode 100644 benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1__longctx32768.log delete mode 100644 benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0.log create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log rename benchmark/results/{GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log => GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log} (60%) rename benchmark/results/{GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log => GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log} (60%) rename benchmark/results/{Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log => Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log} (56%) rename benchmark/results/{Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0.log => Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log} (56%) create mode 100644 benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log create mode 100644 benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log create mode 100644 benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx32768.log delete mode 100644 benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma.log create mode 100644 benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log delete mode 100644 benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4.log create mode 100644 benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log create mode 100644 benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log create mode 100644 benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__fa1.log create mode 100644 benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__fa1__longctx32768.log create mode 100644 benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log create mode 100644 benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log delete mode 100644 benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log create mode 100644 benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log delete mode 100644 benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log rename benchmark/results/{Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log => Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log} (56%) rename benchmark/results/{Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log => Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1__longctx32768.log} (56%) create mode 100644 benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log rename benchmark/results/{Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log => Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log} (60%) rename benchmark/results/{Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log => Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log} (60%) rename benchmark/results/{Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log => Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log} (56%) create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log rename benchmark/results/{Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0.log => Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__fa1.log} (56%) create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha__fa1.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha__fa1__longctx32768.log delete mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log delete mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__fa1.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__fa1.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__fa1__longctx32768.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__hblt0__fa1.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log rename benchmark/results/{Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0.log => Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log} (56%) rename benchmark/results/{Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma.log => Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log} (56%) rename benchmark/results/{Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0.log => Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1__longctx32768.log} (56%) create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log rename benchmark/results/{Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log => Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log} (60%) rename benchmark/results/{Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log => Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1__longctx32768.log} (60%) rename benchmark/results/{Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log => Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log} (56%) rename benchmark/results/{Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0.log => Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log} (56%) create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__fa1.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha__fa1.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha__fa1__longctx32768.log delete mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log delete mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__fa1.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__fa1.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__fa1__longctx32768.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__hblt0__fa1.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log delete mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0.log rename benchmark/results/{Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log => Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log} (56%) rename benchmark/results/{Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0.log => Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1__longctx32768.log} (56%) rename benchmark/results/{Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma.log => Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log} (56%) rename benchmark/results/{Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log => Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log} (60%) rename benchmark/results/{Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log => Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1__longctx32768.log} (60%) rename benchmark/results/{Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0.log => Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log} (56%) create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx32768.log delete mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log delete mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log delete mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1__longctx32768.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log rename benchmark/results/{Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log => Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log} (55%) rename benchmark/results/{Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log => Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log} (55%) delete mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log rename benchmark/results/{Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log => Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1__longctx32768.log} (55%) rename benchmark/results/{Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log => Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log} (51%) rename benchmark/results/{Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log => Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log} (59%) rename benchmark/results/{Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log => Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log} (59%) rename benchmark/results/{Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log => Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log} (56%) rename benchmark/results/{Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0.log => Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log} (56%) create mode 100644 benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log create mode 100644 benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1.log create mode 100644 benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx32768.log delete mode 100644 benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma.log create mode 100644 benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log delete mode 100644 benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4.log create mode 100644 benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1.log create mode 100644 benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log create mode 100644 benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__fa1.log create mode 100644 benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__hblt0__fa1.log create mode 100644 benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log delete mode 100644 benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma.log rename benchmark/results/{Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log => Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log} (56%) delete mode 100644 benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log create mode 100644 benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log rename benchmark/results/{Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0.log => Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1__longctx32768.log} (56%) create mode 100644 benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log rename benchmark/results/{Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log => Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log} (60%) rename benchmark/results/{Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log => Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log} (60%) rename benchmark/results/{Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log => Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log} (56%) rename benchmark/results/{Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0.log => Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log} (56%) create mode 100644 benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log create mode 100644 benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1.log create mode 100644 benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1__longctx32768.log delete mode 100644 benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma.log create mode 100644 benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log delete mode 100644 benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4.log create mode 100644 benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__fa1.log create mode 100644 benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log create mode 100644 benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__fa1.log create mode 100644 benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__hblt0__fa1.log create mode 100644 benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log rename benchmark/results/{Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log => Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log} (56%) rename benchmark/results/{Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0.log => Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log} (56%) rename benchmark/results/{Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma.log => Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1__longctx32768.log} (56%) rename benchmark/results/{Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log => Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log} (56%) rename benchmark/results/{Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log => Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log} (60%) rename benchmark/results/{Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log => Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log} (60%) rename benchmark/results/{Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0.log => Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__fa1.log} (56%) rename benchmark/results/{Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0.log => Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log} (56%) create mode 100644 benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1.log create mode 100644 benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1.log create mode 100644 benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1__longctx32768.log delete mode 100644 benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma.log create mode 100644 benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log delete mode 100644 benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4.log create mode 100644 benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__fa1.log create mode 100644 benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__hblt0__fa1.log create mode 100644 benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__fa1.log create mode 100644 benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__hblt0__fa1.log create mode 100644 benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__hblt0__fa1__longctx32768.log rename benchmark/results/{Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log => Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1__longctx32768.log} (56%) rename benchmark/results/{Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0.log => Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log} (56%) rename benchmark/results/{Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma.log => Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1__longctx32768.log} (56%) rename benchmark/results/{Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0.log => Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1__longctx32768.log} (56%) rename benchmark/results/{Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log => Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log} (60%) rename benchmark/results/{Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log => Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log} (60%) rename benchmark/results/{Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0.log => Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__fa1.log} (56%) rename benchmark/results/{Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0.log => Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log} (56%) create mode 100644 benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1.log create mode 100644 benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1.log create mode 100644 benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1__longctx32768.log delete mode 100644 benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma.log create mode 100644 benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log delete mode 100644 benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4.log create mode 100644 benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__fa1.log create mode 100644 benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__hblt0__fa1.log create mode 100644 benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__fa1.log create mode 100644 benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__hblt0__fa1.log create mode 100644 benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__hblt0__fa1__longctx32768.log rename benchmark/results/{Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc.log => Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1__longctx32768.log} (56%) rename benchmark/results/{Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0.log => Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log} (56%) rename benchmark/results/{Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma.log => Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1__longctx32768.log} (56%) rename benchmark/results/{Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0.log => Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1__longctx32768.log} (56%) rename benchmark/results/{Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk.log => Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log} (60%) rename benchmark/results/{Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv.log => Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log} (60%) rename benchmark/results/{gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0.log => gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1.log} (56%) rename benchmark/results/{gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0.log => gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log} (56%) create mode 100644 benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log create mode 100644 benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1.log create mode 100644 benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1__longctx32768.log delete mode 100644 benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma.log create mode 100644 benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log delete mode 100644 benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4.log create mode 100644 benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__fa1.log create mode 100644 benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1.log create mode 100644 benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__fa1.log create mode 100644 benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__hblt0__fa1.log create mode 100644 benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__hblt0__fa1__longctx32768.log rename benchmark/results/{gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log => gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx32768.log} (56%) rename benchmark/results/{gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0.log => gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log} (56%) rename benchmark/results/{gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma.log => gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1__longctx32768.log} (56%) rename benchmark/results/{gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0.log => gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx32768.log} (56%) rename benchmark/results/{gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log => gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log} (60%) rename benchmark/results/{gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log => gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log} (60%) rename benchmark/results/{gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log => gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log} (56%) rename benchmark/results/{gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0.log => gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log} (56%) create mode 100644 benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log create mode 100644 benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1.log create mode 100644 benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1__longctx32768.log delete mode 100644 benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma.log create mode 100644 benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log delete mode 100644 benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4.log create mode 100644 benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__fa1.log create mode 100644 benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log create mode 100644 benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__fa1.log create mode 100644 benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__hblt0__fa1.log create mode 100644 benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log rename benchmark/results/{gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log => gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log} (56%) rename benchmark/results/{gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0.log => gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log} (56%) rename benchmark/results/{gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma.log => gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1__longctx32768.log} (56%) rename benchmark/results/{gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log => gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log} (56%) rename benchmark/results/{gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk.log => gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log} (72%) rename benchmark/results/{gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log => gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log} (60%) rename benchmark/results/{gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0.log => gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__fa1.log} (56%) rename benchmark/results/{gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0.log => gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log} (56%) create mode 100644 benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1.log create mode 100644 benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1.log create mode 100644 benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1__longctx32768.log delete mode 100644 benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma.log create mode 100644 benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log delete mode 100644 benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4.log create mode 100644 benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__fa1.log create mode 100644 benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__hblt0__fa1.log create mode 100644 benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1__fa1.log create mode 100644 benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1__hblt0__fa1.log create mode 100644 benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1__hblt0__fa1__longctx32768.log rename benchmark/results/{gemma-3-4b-it-Q3_K_S__rocm7_rc.log => gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1__longctx32768.log} (56%) rename benchmark/results/{gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0.log => gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log} (56%) rename benchmark/results/{gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma.log => gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1__longctx32768.log} (56%) rename benchmark/results/{gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0.log => gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1__longctx32768.log} (56%) rename benchmark/results/{gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log => gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log} (60%) rename benchmark/results/{gemma-3-4b-it-Q3_K_S__vulkan_radv.log => gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log} (60%) rename benchmark/results/{gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0.log => gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__fa1.log} (56%) rename benchmark/results/{gpt-oss-120b-F16__rocm6_4_4__hblt0.log => gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log} (56%) create mode 100644 benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__fa1.log create mode 100644 benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/gpt-oss-120b-F16__rocm-7alpha__fa1.log create mode 100644 benchmark/results/gpt-oss-120b-F16__rocm-7alpha__fa1__longctx32768.log delete mode 100644 benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma.log create mode 100644 benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log delete mode 100644 benchmark/results/gpt-oss-120b-F16__rocm6_4_4.log create mode 100644 benchmark/results/gpt-oss-120b-F16__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/gpt-oss-120b-F16__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/gpt-oss-120b-F16__rocm7.1-rocwmma__fa1.log create mode 100644 benchmark/results/gpt-oss-120b-F16__rocm7.1-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/gpt-oss-120b-F16__rocm7.1-rocwmma__hblt0__fa1.log create mode 100644 benchmark/results/gpt-oss-120b-F16__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/gpt-oss-120b-F16__rocm7.1__fa1.log create mode 100644 benchmark/results/gpt-oss-120b-F16__rocm7.1__fa1__longctx32768.log create mode 100644 benchmark/results/gpt-oss-120b-F16__rocm7.1__hblt0__fa1.log create mode 100644 benchmark/results/gpt-oss-120b-F16__rocm7.1__hblt0__fa1__longctx32768.log rename benchmark/results/{gpt-oss-120b-F16__rocm7_rc.log => gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1__longctx32768.log} (56%) rename benchmark/results/{gpt-oss-120b-F16__rocm7_rc__hblt0.log => gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log} (56%) rename benchmark/results/{gpt-oss-120b-F16__rocm7_rc-rocwmma.log => gpt-oss-120b-F16__rocm7_rc__fa1__longctx32768.log} (56%) rename benchmark/results/{gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0.log => gpt-oss-120b-F16__rocm7_rc__hblt0__fa1__longctx32768.log} (56%) rename benchmark/results/{gpt-oss-120b-F16__vulkan_amdvlk.log => gpt-oss-120b-F16__vulkan_amdvlk__fa1__longctx32768.log} (60%) rename benchmark/results/{gpt-oss-120b-F16__vulkan_radv.log => gpt-oss-120b-F16__vulkan_radv__fa1__longctx32768.log} (60%) rename benchmark/results/{gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log => gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log} (56%) rename benchmark/results/{gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0.log => gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log} (56%) create mode 100644 benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1.log create mode 100644 benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1.log create mode 100644 benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1__longctx32768.log delete mode 100644 benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma.log create mode 100644 benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log delete mode 100644 benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4.log create mode 100644 benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__fa1.log create mode 100644 benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log create mode 100644 benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__fa1.log create mode 100644 benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__fa1__longctx32768.log create mode 100644 benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__hblt0__fa1.log create mode 100644 benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log rename benchmark/results/{gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log => gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log} (56%) rename benchmark/results/{gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0.log => gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log} (56%) rename benchmark/results/{gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma.log => gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1__longctx32768.log} (56%) rename benchmark/results/{gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0.log => gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log} (56%) rename benchmark/results/{gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log => gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log} (60%) rename benchmark/results/{gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log => gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log} (60%) rename benchmark/results/{gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0.log => gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__fa1.log} (56%) rename benchmark/results/{gpt-oss-20b-F32__rocm6_4_4__hblt0.log => gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log} (56%) create mode 100644 benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__fa1.log create mode 100644 benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/gpt-oss-20b-F32__rocm-7alpha__fa1.log create mode 100644 benchmark/results/gpt-oss-20b-F32__rocm-7alpha__fa1__longctx32768.log delete mode 100644 benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma.log create mode 100644 benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log delete mode 100644 benchmark/results/gpt-oss-20b-F32__rocm6_4_4.log create mode 100644 benchmark/results/gpt-oss-20b-F32__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/gpt-oss-20b-F32__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/gpt-oss-20b-F32__rocm7.1-rocwmma__fa1.log create mode 100644 benchmark/results/gpt-oss-20b-F32__rocm7.1-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/gpt-oss-20b-F32__rocm7.1-rocwmma__hblt0__fa1.log create mode 100644 benchmark/results/gpt-oss-20b-F32__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/gpt-oss-20b-F32__rocm7.1__fa1.log create mode 100644 benchmark/results/gpt-oss-20b-F32__rocm7.1__fa1__longctx32768.log create mode 100644 benchmark/results/gpt-oss-20b-F32__rocm7.1__hblt0__fa1.log create mode 100644 benchmark/results/gpt-oss-20b-F32__rocm7.1__hblt0__fa1__longctx32768.log rename benchmark/results/{gpt-oss-20b-F32__rocm7_rc.log => gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1__longctx32768.log} (56%) rename benchmark/results/{gpt-oss-20b-F32__rocm7_rc__hblt0.log => gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log} (56%) rename benchmark/results/{gpt-oss-20b-F32__rocm7_rc-rocwmma.log => gpt-oss-20b-F32__rocm7_rc__fa1__longctx32768.log} (56%) rename benchmark/results/{gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0.log => gpt-oss-20b-F32__rocm7_rc__hblt0__fa1__longctx32768.log} (56%) rename benchmark/results/{gpt-oss-20b-F32__vulkan_amdvlk.log => gpt-oss-20b-F32__vulkan_amdvlk__fa1__longctx32768.log} (60%) rename benchmark/results/{gpt-oss-20b-F32__vulkan_radv.log => gpt-oss-20b-F32__vulkan_radv__fa1__longctx32768.log} (60%) rename benchmark/results/{gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0.log => gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__fa1.log} (56%) rename benchmark/results/{gpt-oss-20b-mxfp4__rocm6_4_4__hblt0.log => gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log} (56%) create mode 100644 benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1.log create mode 100644 benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__fa1.log create mode 100644 benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__fa1__longctx32768.log delete mode 100644 benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma.log create mode 100644 benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log delete mode 100644 benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4.log create mode 100644 benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__fa1.log create mode 100644 benchmark/results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__hblt0__fa1.log create mode 100644 benchmark/results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/gpt-oss-20b-mxfp4__rocm7.1__fa1.log create mode 100644 benchmark/results/gpt-oss-20b-mxfp4__rocm7.1__fa1__longctx32768.log create mode 100644 benchmark/results/gpt-oss-20b-mxfp4__rocm7.1__hblt0__fa1.log create mode 100644 benchmark/results/gpt-oss-20b-mxfp4__rocm7.1__hblt0__fa1__longctx32768.log rename benchmark/results/{gpt-oss-20b-mxfp4__rocm7_rc.log => gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1__longctx32768.log} (56%) rename benchmark/results/{gpt-oss-20b-mxfp4__rocm7_rc__hblt0.log => gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log} (56%) rename benchmark/results/{gpt-oss-20b-mxfp4__rocm7_rc-rocwmma.log => gpt-oss-20b-mxfp4__rocm7_rc__fa1__longctx32768.log} (56%) rename benchmark/results/{gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0.log => gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1__longctx32768.log} (56%) rename benchmark/results/{gpt-oss-20b-mxfp4__vulkan_amdvlk.log => gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log} (60%) rename benchmark/results/{gpt-oss-20b-mxfp4__vulkan_radv.log => gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log} (60%) rename benchmark/results/{llama-2-7b.Q4_0__rocm6_4_4-rocwmma.log => llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__fa1.log} (56%) rename benchmark/results/{llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0.log => llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log} (56%) rename benchmark/results/{llama-2-7b.Q4_0__rocm6_4_4.log => llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1.log} (56%) rename benchmark/results/{llama-2-7b.Q4_0__rocm6_4_4__hblt0.log => llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1__longctx32768.log} (56%) create mode 100644 benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__fa1.log create mode 100644 benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__fa1__longctx32768.log create mode 100644 benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/llama-2-7b.Q4_0__rocm7.1-rocwmma__fa1.log create mode 100644 benchmark/results/llama-2-7b.Q4_0__rocm7.1-rocwmma__fa1__longctx32768.log create mode 100644 benchmark/results/llama-2-7b.Q4_0__rocm7.1-rocwmma__hblt0__fa1.log create mode 100644 benchmark/results/llama-2-7b.Q4_0__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/llama-2-7b.Q4_0__rocm7.1__fa1.log create mode 100644 benchmark/results/llama-2-7b.Q4_0__rocm7.1__fa1__longctx32768.log create mode 100644 benchmark/results/llama-2-7b.Q4_0__rocm7.1__hblt0__fa1.log create mode 100644 benchmark/results/llama-2-7b.Q4_0__rocm7.1__hblt0__fa1__longctx32768.log rename benchmark/results/{llama-2-7b.Q4_0__rocm7_rc-rocwmma.log => llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1__longctx32768.log} (56%) rename benchmark/results/{llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0.log => llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log} (56%) rename benchmark/results/{llama-2-7b.Q4_0__rocm7_rc.log => llama-2-7b.Q4_0__rocm7_rc__fa1__longctx32768.log} (56%) rename benchmark/results/{llama-2-7b.Q4_0__rocm7_rc__hblt0.log => llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1__longctx32768.log} (56%) rename benchmark/results/{llama-2-7b.Q4_0__vulkan_amdvlk.log => llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log} (60%) rename benchmark/results/{llama-2-7b.Q4_0__vulkan_radv.log => llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log} (60%) create mode 100644 docs/assets/index2.css create mode 100644 docs/assets/index2.js diff --git a/benchmark/delete_transient_failures.py b/benchmark/delete_transient_failures.py new file mode 100644 index 0000000..aa72778 --- /dev/null +++ b/benchmark/delete_transient_failures.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python3 +import argparse +import glob +import os +import re + +RESULTS_DIR_DEFAULT = "results" + +# Same detection logic as your extractor +HEADER_RE = re.compile(r"^\|\s*model\s*\|", re.IGNORECASE) +SEP_RE = re.compile(r"^\|\s*-+") + +LOAD_ERR = re.compile(r"failed to load model|Device memory allocation.*failed|⚠️\s*Fail", re.IGNORECASE) +HANG_ERR = re.compile(r"GPU Hang|HW Exception", re.IGNORECASE) +GENERIC_ERR = re.compile(r"error:|exit \d+|runtime error|⚠️\s*Runtime Error", re.IGNORECASE) + + +def parse_table(text): + lines = text.splitlines() + rows = [] + header = None + col_idx = {} + + for line in lines: + if HEADER_RE.search(line): + header = [c.strip().lower() for c in line.strip().strip("|").split("|")] + for idx, name in enumerate(header): + col_idx[name] = idx + continue + + if header and (SEP_RE.search(line) or not line.strip()): + continue + + if header and line.startswith("|"): + parts = [c.strip() for c in line.strip().strip("|").split("|")] + if len(parts) < len(header): + continue + row = {} + for name, idx in col_idx.items(): + row[name] = parts[idx] + rows.append(row) + + if header and line.strip() == "" and rows: + break + + return rows + + +def detect_error(text): + if LOAD_ERR.search(text): + return True + if HANG_ERR.search(text): + return True + if GENERIC_ERR.search(text): + return True + return False + + +def is_non_transient_vram_issue(text): + # Do NOT delete logs with this kind of Vulkan OOM + return ( + "ggml_vulkan: Device memory allocation of size" in text + and "Requested buffer size exceeds device buffer size limit" in text + ) + + +def is_failed_run(text): + table_rows = parse_table(text) + + has_pp = any(r.get("test", "").lower() == "pp512" for r in table_rows) + has_tg = any(r.get("test", "").lower() == "tg128" for r in table_rows) + + if has_pp or has_tg: + return False + + return detect_error(text) + + +def main(): + ap = argparse.ArgumentParser( + description="Delete transient-failure benchmark logs in results/" + ) + ap.add_argument( + "--results-dir", + default=RESULTS_DIR_DEFAULT, + help="Directory containing *.log files (default: results)", + ) + ap.add_argument( + "--dry-run", + action="store_true", + help="Only print what would be deleted", + ) + args = ap.parse_args() + + results_dir = args.results_dir + pattern = os.path.join(results_dir, "*.log") + + to_delete = [] + skipped_non_transient = [] + + for path in sorted(glob.glob(pattern)): + try: + with open(path, errors="ignore") as f: + text = f.read() + except OSError as e: + print(f"Could not read {path}: {e}") + continue + + if not is_failed_run(text): + continue + + if is_non_transient_vram_issue(text): + skipped_non_transient.append(path) + continue + + to_delete.append(path) + + if not to_delete and not skipped_non_transient: + print("No failed logs found.") + return + + if skipped_non_transient: + print("Keeping logs with non transient VRAM issues:") + for p in skipped_non_transient: + print(f" KEEP {p}") + + if to_delete: + print("Deleting logs with transient failures:") + for p in to_delete: + print(f" DELETE {p}") + if not args.dry_run: + try: + os.remove(p) + except OSError as e: + print(f" Failed to delete {p}: {e}") + else: + print("No logs to delete.") + + +if __name__ == "__main__": + main() diff --git a/benchmark/generate_results.json.py b/benchmark/generate_results.json.py index c01279c..1f7061b 100644 --- a/benchmark/generate_results.json.py +++ b/benchmark/generate_results.json.py @@ -34,26 +34,45 @@ NAME_B_RE = re.compile(r"(\d+(?:\.\d+)?)B") # Shard suffix in filenames SHARD_RE = re.compile(r"-000\d+-of-000\d+", re.IGNORECASE) +# Long-context suffix in filenames (e.g., __longctx32768) +LONGCTX_RE = re.compile(r"longctx(\d+)", re.IGNORECASE) + # --- Helpers --------------------------------------------------------------- def clean_model_name(raw): base = SHARD_RE.sub("", raw) return base -def parse_env_and_fa(basename): - # pattern: __[__fa1][__hblt0] +def parse_env_flags(basename): + """ + pattern: __[__fa1][__hblt0][__longctx32768] + Returns (env, fa, context_tag, context_tokens) + """ parts = basename.split("__") if len(parts) < 2: - return None, False + return None, False, "default", None env = parts[1] - # scan any extra suffix segments - suffixes = {p.lower() for p in parts[2:]} - fa = ("fa1" in suffixes) - if "hblt0" in suffixes: - env = f"{env}-hblt0" + fa = False + context_tag = "default" + context_tokens = None - return env, fa + for raw_suffix in parts[2:]: + suffix = raw_suffix.lower() + if suffix == "fa1": + fa = True + elif suffix == "hblt0": + env = f"{env}-hblt0" + elif suffix.startswith("longctx"): + context_tag = suffix + m = LONGCTX_RE.search(suffix) + if m: + try: + context_tokens = int(m.group(1)) + except ValueError: + context_tokens = None + + return env, fa, context_tag, context_tokens def env_base_and_variant(env): # e.g. "rocm6_4_2-rocwmma" -> ("rocm6_4_2", "rocwmma") @@ -135,8 +154,9 @@ for path in sorted(glob.glob(os.path.join(RESULTS_DIR, "*.log"))): continue model_raw, _rest = base.split("__", 1) - env, fa_from_name = parse_env_and_fa(base) - envs.add(env) + env, fa_from_name, context_tag, context_tokens = parse_env_flags(base) + if env: + envs.add(env) model_clean = clean_model_name(model_raw) @@ -215,6 +235,8 @@ for path in sorted(glob.glob(os.path.join(RESULTS_DIR, "*.log"))): "env_base": env_base, "env_variant": env_variant, # e.g. "rocwmma" "fa": bool(fa_enabled), + "context": context_tag or "default", + "context_tokens": context_tokens, "test": test, # "pp512" | "tg128" | None (if error) "tps_mean": tps_mean, "tps_std": tps_std, diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log similarity index 56% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log rename to benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log index cc456bd..0bad5ba 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 135.87 ± 0.06 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.46 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 103.27 ± 0.47 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 22.61 ± 0.00 | -build: 11f0af55 (6736) +build: 31df4608 (7038) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log new file mode 100644 index 0000000..67a4a8a --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log @@ -0,0 +1,9 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 19.09 ± 0.00 | +HW Exception by GPU node-1 (Agent handle: 0xe277840) reason :GPU Hang +✖ ! [rocm-7alpha-rocwmma-improved] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log similarity index 56% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0.log rename to benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log index 92bb7f3..5b54fa8 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 160.81 ± 0.78 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.41 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 87.62 ± 0.29 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 22.57 ± 0.00 | -build: 4807e8f9 (6609) +build: 1c398dc9e (7034) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..d7905a1 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log @@ -0,0 +1,9 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 13.99 ± 0.00 | +HW Exception by GPU node-1 (Agent handle: 0x3b2ac4f0) reason :GPU Hang +✖ ! [rocm-7alpha-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1.log new file mode 100644 index 0000000..eef29dc --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 93.53 ± 0.08 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 22.66 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx32768.log new file mode 100644 index 0000000..fbab93a --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx32768.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +Memory access fault by GPU node-1 (Agent handle: 0x2cfb74f0) on address 0x7fb6c65be000. Reason: Page not present or supervisor privilege. +✖ ! [rocm-7alpha] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma.log deleted file mode 100644 index ec80d4d..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 121.16 ± 0.25 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.46 ± 0.00 | - -build: 11f0af55 (6736) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log index d16366a..7206046 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 134.20 ± 0.28 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.04 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 135.10 ± 0.15 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 21.72 ± 0.00 | -build: 11f0af55 (6736) +build: 1c398dc9e (7034) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..d4885f1 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log @@ -0,0 +1,9 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 12.41 ± 0.00 | +HW Exception by GPU node-1 (Agent handle: 0x3c42f5c0) reason :GPU Hang +✖ ! [rocm6_4_4-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log index cc7c79d..da2e733 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 171.53 ± 0.45 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.08 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 142.39 ± 0.25 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 21.70 ± 0.02 | -build: 11f0af55 (6736) +build: 1c398dc9e (7034) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..4116dc0 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,9 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 13.30 ± 0.00 | +HW Exception by GPU node-1 (Agent handle: 0x1feee5c0) reason :GPU Hang +✖ ! [rocm6_4_4-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __hblt0__fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4.log deleted file mode 100644 index bfe97bd..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 126.30 ± 0.14 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 19.83 ± 0.00 | - -build: 4807e8f9 (6609) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log index 15cb662..989d06a 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 136.00 ± 0.15 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.93 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 135.50 ± 0.06 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 21.19 ± 0.00 | -build: 4807e8f9 (6609) +build: 1c398dc9e (7034) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..32cbd2d --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 30.21 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 9.28 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log index a8e57a0..44a30df 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 132.96 ± 0.49 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.99 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 172.61 ± 0.32 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 21.78 ± 0.00 | -build: 4807e8f9 (6609) +build: 1c398dc9e (7034) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..8e28f5f --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,9 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 33.91 ± 0.00 | +HW Exception by GPU node-1 (Agent handle: 0x1d99e5c0) reason :GPU Hang +✖ ! [rocm6_4_4] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __hblt0__fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log new file mode 100644 index 0000000..b971d30 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 77.55 ± 0.60 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 21.70 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..86798dc --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log @@ -0,0 +1,9 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 16.62 ± 0.00 | +HW Exception by GPU node-1 (Agent handle: 0x34ae58c0) reason :GPU Hang +✖ ! [rocm7.1-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..99de90f --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 146.50 ± 0.48 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 21.72 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..47de304 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,9 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.23 ± 0.00 | +Memory access fault by GPU node-1 (Agent handle: 0x15b288c0) on address 0x7f4ebb38d000. Reason: Page not present or supervisor privilege. +✖ ! [rocm7.1-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __hblt0__fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1.log new file mode 100644 index 0000000..6263f58 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 99.42 ± 0.19 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 21.70 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1__longctx32768.log new file mode 100644 index 0000000..e644886 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 29.83 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.85 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log new file mode 100644 index 0000000..922b095 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 172.08 ± 0.59 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 21.73 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..9854413 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 32.69 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.85 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log index 523ba45..159357c 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 67.08 ± 0.15 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.07 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 72.38 ± 0.12 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 21.73 ± 0.01 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..43e10b1 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log @@ -0,0 +1,9 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 16.40 ± 0.00 | +HW Exception by GPU node-1 (Agent handle: 0x1678a6f0) reason :GPU Hang +✖ ! [rocm7_rc-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log deleted file mode 100644 index 9b3654e..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 117.71 ± 0.66 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.53 ± 0.00 | - -build: a3cb0474 (6735) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log index 7cccce1..e11d2bb 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 148.21 ± 0.25 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.10 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 131.85 ± 0.23 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 21.71 ± 0.04 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..e87dc1b --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,9 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.15 ± 0.00 | +HW Exception by GPU node-1 (Agent handle: 0x106c26f0) reason :GPU Hang +✖ ! [rocm7_rc-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __hblt0__fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log deleted file mode 100644 index 1dbc576..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 119.33 ± 0.28 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.19 ± 0.00 | - -build: a3cb0474 (6735) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log index f8a3d29..2fe705c 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 93.03 ± 0.12 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.00 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 99.25 ± 0.11 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 21.54 ± 0.01 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1__longctx32768.log new file mode 100644 index 0000000..3dccbf9 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1__longctx32768.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +Memory access fault by GPU node-1 (Agent handle: 0x3c42b6f0) on address 0x7f0a849aa000. Reason: Page not present or supervisor privilege. +✖ ! [rocm7_rc] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log deleted file mode 100644 index 2e935e0..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 136.81 ± 0.43 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.52 ± 0.00 | - -build: a3cb0474 (6735) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log index fd97866..364824c 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 148.95 ± 0.73 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.04 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 169.93 ± 0.33 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 21.71 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log similarity index 56% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log rename to benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log index 6522117..6e8957c 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 102.61 ± 0.20 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.54 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 33.65 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.82 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log index 3d6be96..47eed24 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 218.68 ± 0.54 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 24.27 ± 0.01 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 217.91 ± 0.48 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 24.50 ± 0.06 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log similarity index 60% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log rename to benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log index b1887c5..1311ae3 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 216.84 ± 0.52 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 24.15 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 22.82 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 5.79 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log index a336437..5601ee8 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 236.02 ± 2.60 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 24.51 ± 0.01 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 235.07 ± 0.58 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 24.84 ± 0.01 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log similarity index 60% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log rename to benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log index 0dda835..4eb5776 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 223.39 ± 1.25 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 24.06 ± 0.03 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 33.43 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 10.49 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log similarity index 56% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log rename to benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log index b1e22c6..8263fe5 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 133.48 ± 0.45 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.77 ± 0.11 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 92.38 ± 0.37 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.64 ± 0.00 | -build: 11f0af55 (6736) +build: 31df4608 (7038) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log similarity index 56% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0.log rename to benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log index 59c0892..0fc4ce7 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 140.68 ± 0.66 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.84 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 19.45 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.86 ± 0.00 | -build: 4807e8f9 (6609) +build: 31df4608 (7038) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log new file mode 100644 index 0000000..a71c62c --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 86.50 ± 0.17 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.61 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..15a1325 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log @@ -0,0 +1,9 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 14.06 ± 0.00 | +HW Exception by GPU node-1 (Agent handle: 0x97894f0) reason :GPU Hang +✖ ! [rocm-7alpha-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1.log new file mode 100644 index 0000000..70e2488 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 74.73 ± 0.27 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.66 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx32768.log new file mode 100644 index 0000000..3d49e46 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 26.97 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.58 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma.log deleted file mode 100644 index 1d82208..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 118.41 ± 0.20 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.75 ± 0.16 | - -build: 11f0af55 (6736) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log index 232c71a..16c69fb 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 128.51 ± 0.51 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.13 ± 0.16 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 125.43 ± 0.26 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.48 ± 0.14 | -build: 11f0af55 (6736) +build: 1c398dc9e (7034) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..bc468da --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x3a41b5c0) reason :GPU Hang +✖ ! [rocm6_4_4-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log index e9db21c..a489899 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 143.55 ± 0.54 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.17 ± 0.06 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 140.41 ± 0.79 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.52 ± 0.05 | -build: 11f0af55 (6736) +build: 1c398dc9e (7034) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..7e4d816 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,9 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 13.20 ± 0.00 | +HW Exception by GPU node-1 (Agent handle: 0x2b2915c0) reason :GPU Hang +✖ ! [rocm6_4_4-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0__fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4.log deleted file mode 100644 index dbef949..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 121.76 ± 0.14 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.69 ± 0.00 | - -build: 4807e8f9 (6609) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log index a56945a..4d8bb2e 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 129.77 ± 0.12 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.14 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 130.63 ± 0.70 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.44 ± 0.00 | -build: 4807e8f9 (6609) +build: 1c398dc9e (7034) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..08a0928 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 26.75 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.10 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log index 94b1ac0..2a333e8 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 134.52 ± 0.53 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.08 ± 0.21 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 145.79 ± 0.11 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.57 ± 0.06 | -build: 4807e8f9 (6609) +build: 1c398dc9e (7034) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..6fab8e3 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,9 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 33.65 ± 0.00 | +HW Exception by GPU node-1 (Agent handle: 0x14c455c0) reason :GPU Hang +✖ ! [rocm6_4_4] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0__fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1.log new file mode 100644 index 0000000..5dd5559 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 69.31 ± 0.07 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.50 ± 0.09 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..7c0958d --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log @@ -0,0 +1,9 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.07 ± 0.00 | +HW Exception by GPU node-1 (Agent handle: 0x231278c0) reason :GPU Hang +✖ ! [rocm7.1-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..f95e8b9 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 136.65 ± 0.08 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.46 ± 0.16 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..5bb8ae4 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,9 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.05 ± 0.00 | +HW Exception by GPU node-1 (Agent handle: 0x3b6528c0) reason :GPU Hang +✖ ! [rocm7.1-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0__fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__fa1.log new file mode 100644 index 0000000..93400e6 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 94.32 ± 0.20 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.25 ± 0.53 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__fa1__longctx32768.log new file mode 100644 index 0000000..3ba3a60 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__fa1__longctx32768.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +Memory access fault by GPU node-1 (Agent handle: 0x3cdc48c0) on address 0x7f1399b6e000. Reason: Page not present or supervisor privilege. +✖ ! [rocm7.1] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__hblt0__fa1.log new file mode 100644 index 0000000..2543587 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 130.72 ± 0.76 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.54 ± 0.04 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..da835a1 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x123888c0) reason :GPU Hang +✖ ! [rocm7.1] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0__fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma.log deleted file mode 100644 index 5d0df46..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 97.09 ± 0.15 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.89 ± 0.00 | - -build: a3cb0474 (6735) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log index 599ea21..df308dc 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 69.91 ± 0.44 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.13 ± 0.11 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 114.56 ± 0.18 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.58 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log similarity index 56% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log rename to benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log index 3dfe042..e1e7b9a 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 91.95 ± 0.23 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.80 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 16.46 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 1.65 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log deleted file mode 100644 index 067a1fa..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 128.74 ± 0.30 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.77 ± 0.20 | - -build: a3cb0474 (6735) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log index 3c4b809..53cfcab 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 133.32 ± 0.82 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.10 ± 0.20 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 159.14 ± 0.64 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.44 ± 0.20 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..6c2b70e --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,9 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.46 ± 0.00 | +HW Exception by GPU node-1 (Agent handle: 0x2566c6f0) reason :GPU Hang +✖ ! [rocm7_rc-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0__fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log index bcb231a..09b5d9e 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 125.81 ± 0.29 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.20 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 96.45 ± 0.26 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.51 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1__longctx32768.log new file mode 100644 index 0000000..7e0757d --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1__longctx32768.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +Memory access fault by GPU node-1 (Agent handle: 0x19b4f6f0) on address 0x7f5ea34ff000. Reason: Page not present or supervisor privilege. +✖ ! [rocm7_rc] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0.log deleted file mode 100644 index 820ccad..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 142.12 ± 0.60 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.89 ± 0.00 | - -build: a3cb0474 (6735) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log index f1daee6..8b61cc2 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 139.60 ± 0.48 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.10 ± 0.17 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 130.86 ± 0.36 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.53 ± 0.06 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..ca57bca --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log @@ -0,0 +1,9 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 36.62 ± 0.00 | +HW Exception by GPU node-1 (Agent handle: 0x2d1506f0) reason :GPU Hang +✖ ! [rocm7_rc] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0__fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log index a35925a..658f23d 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 262.18 ± 1.19 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 17.30 ± 0.01 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 261.54 ± 1.01 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 17.42 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log similarity index 60% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log rename to benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log index 134e65b..cd48585 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 260.51 ± 1.03 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 17.26 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 23.19 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 5.25 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log index 7a14496..df5cc5b 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 233.21 ± 6.28 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 17.65 ± 0.01 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 233.87 ± 0.08 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 17.74 ± 0.01 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log similarity index 60% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log rename to benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log index f587e64..9384aac 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 222.31 ± 0.71 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 17.43 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 33.31 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 9.00 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log similarity index 56% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log rename to benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log index eec5bf4..7ce85ee 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 98.82 ± 0.24 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.78 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 103.85 ± 0.10 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 2.79 ± 0.00 | -build: 11f0af55 (6736) +build: 31df4608 (7038) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log similarity index 56% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0.log rename to benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log index b9a373c..8742dc5 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 98.12 ± 0.10 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.78 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 19.88 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.46 ± 0.00 | -build: 4807e8f9 (6609) +build: 31df4608 (7038) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log new file mode 100644 index 0000000..b4d3bb4 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 103.65 ± 0.07 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 2.79 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..896060c --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 12.76 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 0.80 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log new file mode 100644 index 0000000..1412b4c --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 105.64 ± 0.24 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 2.79 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx32768.log new file mode 100644 index 0000000..73c9fac --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 33.13 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.31 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma.log deleted file mode 100644 index 87b4648..0000000 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -hipBLASLt error: Heuristic Fetch Failed! -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 98.00 ± 0.05 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.77 ± 0.01 | - -build: 11f0af55 (6736) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log index 3c54596..f70644b 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 103.65 ± 0.17 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 102.56 ± 0.06 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 2.78 ± 0.00 | -build: 11f0af55 (6736) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..e8d7c87 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log @@ -0,0 +1,9 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 11.74 ± 0.00 | +HW Exception by GPU node-1 (Agent handle: 0x1ed645c0) reason :GPU Hang +✖ ! [rocm6_4_4-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log index 296b7e3..e920862 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 102.51 ± 0.05 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 103.23 ± 0.06 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 2.78 ± 0.00 | -build: 11f0af55 (6736) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..46d4498 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,9 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 11.69 ± 0.00 | +HW Exception by GPU node-1 (Agent handle: 0x147425c0) reason :GPU Hang +✖ ! [rocm6_4_4-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __hblt0__fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4.log deleted file mode 100644 index 5e0d92a..0000000 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -hipBLASLt error: Heuristic Fetch Failed! -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 97.85 ± 0.06 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.76 ± 0.00 | - -build: 4807e8f9 (6609) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log index b0dab6c..e90399b 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 102.47 ± 0.08 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 105.28 ± 0.10 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 2.77 ± 0.00 | -build: 4807e8f9 (6609) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..dbc6872 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x2dba35c0) reason :GPU Hang +✖ ! [rocm6_4_4] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log index 6fb13dc..4e15bb4 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 104.23 ± 0.24 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 104.32 ± 0.29 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 2.78 ± 0.00 | -build: 4807e8f9 (6609) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..f1154a4 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 34.35 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.40 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log new file mode 100644 index 0000000..bfc8cc3 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 102.80 ± 0.11 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 2.79 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..56dac28 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 15.25 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 1.07 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..73d833d --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 102.92 ± 0.14 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 2.79 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..94875a6 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 15.47 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 1.07 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__fa1.log new file mode 100644 index 0000000..0b55222 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 103.28 ± 0.10 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 2.79 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__fa1__longctx32768.log new file mode 100644 index 0000000..38507b6 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 34.15 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.41 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log new file mode 100644 index 0000000..def3e03 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 104.30 ± 0.24 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 2.79 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..165a9b2 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 35.54 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.46 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log deleted file mode 100644 index 871f10a..0000000 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 92.92 ± 8.60 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.78 ± 0.00 | - -build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log index 718f2ba..a9b07ac 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 103.05 ± 0.09 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 103.21 ± 0.08 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 2.79 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..57f14ae --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log @@ -0,0 +1,9 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 15.05 ± 0.00 | +HW Exception by GPU node-1 (Agent handle: 0x13295700) reason :GPU Hang +✖ ! [rocm7_rc-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log deleted file mode 100644 index 46fe5df..0000000 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 93.96 ± 0.14 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.78 ± 0.00 | - -build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log index c447119..6afed62 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 97.51 ± 0.21 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 96.88 ± 0.10 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 2.79 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log similarity index 56% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log rename to benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log index 7f147c8..f12bba5 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 95.55 ± 0.07 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.77 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 15.08 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 1.07 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log index 985b743..f4be44f 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 104.25 ± 0.16 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 104.95 ± 0.14 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 2.79 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1__longctx32768.log similarity index 56% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log rename to benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1__longctx32768.log index 7f54c86..10b0e83 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 93.72 ± 0.13 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.78 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 32.03 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.43 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log index 75949af..f91c6b2 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 97.17 ± 0.16 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 97.99 ± 0.15 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 2.79 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..3496552 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +Memory access fault by GPU node-1 (Agent handle: 0x2c2c1700) on address 0x7fb65e7a9000. Reason: Page not present or supervisor privilege. +✖ ! [rocm7_rc] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __hblt0__fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log index 8b37fdb..92ee7d9 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp512 | 98.46 ± 0.54 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp512 | 98.55 ± 0.11 | | llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg128 | 2.80 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log similarity index 60% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log rename to benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log index 1cf5184..9a19b40 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 0 | pp512 | 97.22 ± 0.38 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 0 | tg128 | 2.81 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 18.09 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 2.12 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log index a378333..6df56ae 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp512 | 86.06 ± 1.83 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp512 | 86.56 ± 0.57 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg128 | 2.77 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log similarity index 60% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log rename to benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log index 1a4f0e1..937c694 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 0 | pp512 | 83.82 ± 1.56 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 0 | tg128 | 2.78 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 22.40 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 2.36 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log similarity index 56% rename from benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log index 3052d56..5e208e9 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 278.22 ± 1.12 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.67 ± 0.03 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 263.94 ± 2.74 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.18 ± 0.00 | -build: 11f0af55 (6736) +build: 31df4608 (7038) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log new file mode 100644 index 0000000..6caba6b --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log @@ -0,0 +1,9 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 155.11 ± 0.00 | +HW Exception by GPU node-1 (Agent handle: 0x3ede5840) reason :GPU Hang +✖ ! [rocm-7alpha-rocwmma-improved] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__fa1.log similarity index 56% rename from benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0.log rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__fa1.log index e6885ae..70dc86e 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 276.43 ± 0.75 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.69 ± 0.03 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 273.53 ± 2.86 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.17 ± 0.03 | -build: 4807e8f9 (6609) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..968b5d4 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 109.57 ± 0.00 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.85 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha__fa1.log new file mode 100644 index 0000000..267824f --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 273.71 ± 1.81 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.19 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha__fa1__longctx32768.log new file mode 100644 index 0000000..6a249ea --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha__fa1__longctx32768.log @@ -0,0 +1,9 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 188.24 ± 0.00 | +HW Exception by GPU node-1 (Agent handle: 0x143664f0) reason :GPU Hang +✖ ! [rocm-7alpha] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma.log deleted file mode 100644 index 694a8ca..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 264.81 ± 10.73 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.68 ± 0.05 | - -build: 11f0af55 (6736) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__fa1.log index 525ed6d..d59b4c3 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 282.95 ± 5.18 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.77 ± 0.06 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 289.25 ± 1.39 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.16 ± 0.01 | -build: 11f0af55 (6736) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..6dafb67 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 102.08 ± 0.00 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.97 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log index 49eed32..a640b5e 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x16ddf160) on address 0x7f64d1356000. Reason: Page not present or supervisor privilege. -✖ ! [rocm6_4_4-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __hblt0__fa1 failed (exit 134) +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 292.12 ± 0.61 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.16 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..4d1e5f9 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,9 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 99.11 ± 0.00 | +HW Exception by GPU node-1 (Agent handle: 0x1603e5c0) reason :GPU Hang +✖ ! [rocm6_4_4-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __hblt0__fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4.log deleted file mode 100644 index 857b288..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 269.91 ± 1.51 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.50 ± 0.00 | - -build: 4807e8f9 (6609) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__fa1.log index 8195028..a75b786 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 288.79 ± 1.53 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.79 ± 0.05 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 290.60 ± 0.66 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.04 ± 0.00 | -build: 4807e8f9 (6609) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..3eb4ab4 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 201.09 ± 0.00 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.17 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0__fa1.log index 52bc536..effe5f2 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x12e56b10) reason :GPU Hang -✖ ! [rocm6_4_4] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __hblt0__fa1 failed (exit 134) +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 292.38 ± 1.41 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.18 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..d874dfa --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x2e6405c0) reason :GPU Hang +✖ ! [rocm6_4_4] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __hblt0__fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__fa1.log new file mode 100644 index 0000000..bf1c76d --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 293.23 ± 0.81 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.16 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..06beff3 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 128.37 ± 0.00 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.22 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..5dd6ccc --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 287.19 ± 1.41 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.17 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..7347e66 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,9 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 132.25 ± 0.00 | +HW Exception by GPU node-1 (Agent handle: 0x1daad8d0) reason :GPU Hang +✖ ! [rocm7.1-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __hblt0__fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__fa1.log new file mode 100644 index 0000000..1268260 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__fa1.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +HW Exception by GPU node-1 (Agent handle: 0x320d58b0) reason :GPU Hang +✖ ! [rocm7.1] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__fa1__longctx32768.log new file mode 100644 index 0000000..e3d8bfe --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__fa1__longctx32768.log @@ -0,0 +1,9 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 204.62 ± 0.00 | +HW Exception by GPU node-1 (Agent handle: 0x38db28d0) reason :GPU Hang +✖ ! [rocm7.1] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__hblt0__fa1.log new file mode 100644 index 0000000..a2eb5aa --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 294.05 ± 2.13 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.17 ± 0.01 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..79ba6ac --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 201.32 ± 0.00 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 9.96 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log index 139eda7..51a2109 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 288.92 ± 3.51 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.81 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 292.59 ± 1.05 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.16 ± 0.01 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log similarity index 56% rename from benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0.log rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log index df2e258..3e83b8d 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 271.87 ± 1.00 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.69 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 126.89 ± 0.00 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.26 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log index cdcef38..6dd5a2d 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 285.69 ± 1.15 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.82 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 287.84 ± 2.59 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.17 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log similarity index 56% rename from benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma.log rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log index 3aa3ab0..b16c16b 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 270.82 ± 1.43 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.66 ± 0.07 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 127.46 ± 0.00 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.33 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log index 7865e05..5a01db9 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 291.90 ± 1.98 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.78 ± 0.02 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 292.02 ± 1.98 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.13 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1__longctx32768.log similarity index 56% rename from benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0.log rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1__longctx32768.log index 45c12aa..03f663c 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 275.26 ± 1.54 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.66 ± 0.08 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 207.12 ± 0.00 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.64 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1.log index 26c6647..a54a3f8 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1.log @@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x292a92a0) reason :GPU Hang -✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __hblt0__fa1 failed (exit 134) +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 282.70 ± 0.57 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.15 ± 0.01 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..951edb7 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 202.42 ± 0.00 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.16 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log index 7b878a2..4f33060 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 224.57 ± 3.64 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 15.76 ± 0.01 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 224.36 ± 2.08 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 15.75 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log similarity index 60% rename from benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log index 1c4d9fe..70c1772 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 224.42 ± 3.09 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 15.99 ± 0.02 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 84.26 ± 0.00 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 10.85 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log index a07e37d..73410e7 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 212.38 ± 2.39 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 15.76 ± 0.01 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 211.78 ± 1.53 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 15.73 ± 0.01 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1__longctx32768.log similarity index 60% rename from benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1__longctx32768.log index 8d69d90..0e48ea6 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 206.64 ± 2.56 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 15.81 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 85.49 ± 0.00 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 12.59 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log similarity index 56% rename from benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log index b54f84b..729dcff 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 260.53 ± 23.26 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.82 ± 0.11 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 272.42 ± 2.18 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 12.16 ± 0.00 | -build: 11f0af55 (6736) +build: 31df4608 (7038) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log similarity index 56% rename from benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0.log rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log index 5a7c43c..c010215 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 283.00 ± 2.10 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.90 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 149.67 ± 0.00 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.51 ± 0.00 | -build: 4807e8f9 (6609) +build: 31df4608 (7038) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__fa1.log new file mode 100644 index 0000000..e475de4 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 273.57 ± 2.62 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 12.18 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..5040a52 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 108.17 ± 0.00 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.86 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha__fa1.log new file mode 100644 index 0000000..fd71fc8 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 274.27 ± 3.87 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 12.17 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha__fa1__longctx32768.log new file mode 100644 index 0000000..aa2db73 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha__fa1__longctx32768.log @@ -0,0 +1,9 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 190.45 ± 0.00 | +HW Exception by GPU node-1 (Agent handle: 0x145324f0) reason :GPU Hang +✖ ! [rocm-7alpha] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma.log deleted file mode 100644 index cb99f32..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 275.00 ± 1.39 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.89 ± 0.03 | - -build: 11f0af55 (6736) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__fa1.log index edb356d..36c42f9 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 293.68 ± 3.72 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 11.96 ± 0.02 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 296.39 ± 0.35 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 12.15 ± 0.00 | -build: 11f0af55 (6736) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..06a66cd --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log @@ -0,0 +1,9 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 100.80 ± 0.00 | +HW Exception by GPU node-1 (Agent handle: 0x37c3a5c0) reason :GPU Hang +✖ ! [rocm6_4_4-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log index 18e96ab..9635772 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 295.09 ± 2.26 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 11.98 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 295.81 ± 2.22 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 12.15 ± 0.00 | -build: 11f0af55 (6736) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..3a6f4d1 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,9 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 99.06 ± 0.00 | +Memory access fault by GPU node-1 (Agent handle: 0x12fb55c0) on address 0x7f33259aa000. Reason: Page not present or supervisor privilege. +✖ ! [rocm6_4_4-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __hblt0__fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4.log deleted file mode 100644 index 5cc9682..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 276.63 ± 1.32 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.87 ± 0.00 | - -build: 4807e8f9 (6609) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__fa1.log index 9d334f2..5f23bac 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 299.51 ± 2.06 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 11.99 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 295.53 ± 3.47 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 12.12 ± 0.00 | -build: 4807e8f9 (6609) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..66a18ab --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 208.57 ± 0.00 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 9.36 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0__fa1.log index e30f07d..9d9395a 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 293.35 ± 11.21 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 11.94 ± 0.13 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 298.30 ± 1.55 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 12.15 ± 0.00 | -build: 4807e8f9 (6609) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..450f683 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +Memory access fault by GPU node-1 (Agent handle: 0x3dac65c0) on address 0x7f27e4f66000. Reason: Page not present or supervisor privilege. +✖ ! [rocm6_4_4] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __hblt0__fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__fa1.log new file mode 100644 index 0000000..b691060 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 295.26 ± 1.05 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 12.16 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..68d0fd5 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 124.66 ± 0.00 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.76 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..514eace --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 292.62 ± 1.21 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 12.17 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..9ce7211 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 124.35 ± 0.00 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.68 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__fa1.log new file mode 100644 index 0000000..b8117ce --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 296.33 ± 1.49 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 12.17 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__fa1__longctx32768.log new file mode 100644 index 0000000..e76e685 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__fa1__longctx32768.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +Memory access fault by GPU node-1 (Agent handle: 0x17e3c8d0) on address 0x7f09927ea000. Reason: Page not present or supervisor privilege. +✖ ! [rocm7.1] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__hblt0__fa1.log new file mode 100644 index 0000000..960b7b2 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 296.32 ± 1.14 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 12.16 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..3fdb828 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 202.17 ± 0.00 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.77 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log index 82f48de..1dd56b4 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 296.04 ± 2.16 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 11.98 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 291.43 ± 1.53 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 12.16 ± 0.01 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..514a2f1 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log @@ -0,0 +1,9 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 127.05 ± 0.00 | +HW Exception by GPU node-1 (Agent handle: 0x3beb7700) reason :GPU Hang +✖ ! [rocm7_rc-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0.log deleted file mode 100644 index 86c1b70..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 266.07 ± 22.72 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.92 ± 0.00 | - -build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log index d94673e..2c41a32 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 257.00 ± 4.08 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 11.76 ± 0.51 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 287.94 ± 1.41 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 12.16 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log similarity index 56% rename from benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log index 8882272..0dfe4b0 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 263.12 ± 18.31 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.89 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 127.25 ± 0.00 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.36 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log index c9f896e..3ed1431 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 284.19 ± 24.64 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 11.89 ± 0.20 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 268.04 ± 46.82 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 12.14 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1__longctx32768.log similarity index 56% rename from benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0.log rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1__longctx32768.log index eeccd01..8d2c005 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 273.01 ± 1.64 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.85 ± 0.11 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 204.94 ± 0.00 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.70 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1.log index 1f98864..c54c53f 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1.log @@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0xe25e2a0) reason :GPU Hang -✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __hblt0__fa1 failed (exit 134) +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 287.00 ± 2.42 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 12.17 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log similarity index 56% rename from benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma.log rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log index 2db2881..c15c72a 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 272.39 ± 2.15 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.92 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 202.54 ± 0.00 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.69 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log index 8945f44..49022c7 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 346.93 ± 1.50 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 349.58 ± 2.09 | | llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 12.44 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log similarity index 60% rename from benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log index b75fe91..ea674f2 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 346.53 ± 1.71 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 12.57 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 99.90 ± 0.00 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 9.20 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log index 3a955b7..e10b91a 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 280.38 ± 1.48 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 12.58 ± 0.00 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 280.28 ± 1.95 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 12.57 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1__longctx32768.log similarity index 60% rename from benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1__longctx32768.log index eccb0f0..00f64ac 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 272.53 ± 1.82 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 12.58 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 106.42 ± 0.00 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 10.49 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log similarity index 56% rename from benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0.log rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log index d029a1f..e8bb713 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 295.23 ± 0.70 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.81 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 313.68 ± 2.67 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 19.49 ± 0.00 | -build: 4807e8f9 (6609) +build: 31df4608 (7038) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log new file mode 100644 index 0000000..fc63c97 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 154.48 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.04 ± 0.00 | + +build: 31df4608 (7038) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log new file mode 100644 index 0000000..331c05f --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 311.89 ± 2.25 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 19.49 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..7f13842 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 109.25 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.06 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1.log new file mode 100644 index 0000000..0d891ea --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 314.61 ± 2.38 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 19.52 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx32768.log new file mode 100644 index 0000000..9f7e902 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx32768.log @@ -0,0 +1,7 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +Memory access fault by GPU node-1 (Agent handle: 0x2b16a4f0) on address 0x7fc434965000. Reason: Page not present or supervisor privilege. +:0:rocdevice.cpp :3588: 7137878235 us: Callback: Queue 0x7fc544400000 aborting with error : HSA_STATUS_ERROR_MEMORY_APERTURE_VIOLATION: The agent attempted to access memory beyond the largest legal address. code: 0x29 +✖ ! [rocm-7alpha] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma.log deleted file mode 100644 index e7881a2..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -hipBLASLt error: Heuristic Fetch Failed! -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 283.37 ± 1.53 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.81 ± 0.00 | - -build: 11f0af55 (6736) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log index bf8c9d0..ce9ca8c 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 305.77 ± 1.56 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.97 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 308.87 ± 1.54 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 18.54 ± 0.01 | -build: 11f0af55 (6736) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..b5c4247 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 101.87 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 12.83 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log deleted file mode 100644 index 28cf6b2..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0xcb3c160) on address 0x7fdd3957e000. Reason: Page not present or supervisor privilege. -✖ ! [rocm6_4_4-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __hblt0 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log index 857fd84..3cb0a3e 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 306.78 ± 2.00 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.96 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 306.69 ± 2.02 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 18.58 ± 0.01 | -build: 11f0af55 (6736) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..67162a9 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 100.29 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 12.72 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4.log deleted file mode 100644 index e974bfc..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -hipBLASLt error: Heuristic Fetch Failed! -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 285.54 ± 1.17 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.59 ± 0.00 | - -build: 4807e8f9 (6609) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log index c30ddd6..5205dc6 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 310.82 ± 2.23 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.96 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 308.50 ± 4.59 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 18.57 ± 0.00 | -build: 4807e8f9 (6609) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..92bc9c9 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 209.52 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 12.95 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log index a67e3ce..560e0ce 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 312.09 ± 1.64 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.97 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 308.24 ± 0.27 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 18.59 ± 0.00 | -build: 4807e8f9 (6609) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..098adc5 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 217.84 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 12.97 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log new file mode 100644 index 0000000..cba7d0c --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 304.34 ± 2.51 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 18.61 ± 0.02 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..ce38702 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 138.29 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 12.95 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..29e45ba --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 305.86 ± 2.98 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 18.55 ± 0.04 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..bdbec90 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 134.32 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.62 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1.log new file mode 100644 index 0000000..041ef73 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 306.39 ± 1.58 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 18.59 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1__longctx32768.log new file mode 100644 index 0000000..6d5608d --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 198.60 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 12.95 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log new file mode 100644 index 0000000..8622337 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 309.00 ± 2.30 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 18.56 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..edb70d8 --- /dev/null +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 201.95 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 12.81 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log index 2712b2f..1d0bb41 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 305.36 ± 1.78 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.97 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 305.09 ± 1.49 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 18.58 ± 0.01 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log similarity index 55% rename from benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log index d7a9f39..092d4d5 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 284.71 ± 1.24 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.78 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 136.13 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 12.96 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log index 02766ec..4e5ac5a 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 298.01 ± 1.45 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.96 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 292.67 ± 0.91 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 18.60 ± 0.02 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log similarity index 55% rename from benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log index 538e156..01f0f7e 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 281.11 ± 2.07 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.82 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 131.73 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 12.45 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log deleted file mode 100644 index 392d382..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 281.91 ± 2.33 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.79 ± 0.00 | - -build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log index e512cda..132dabf 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 306.77 ± 2.47 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.97 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 302.22 ± 1.02 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 18.58 ± 0.01 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1__longctx32768.log similarity index 55% rename from benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1__longctx32768.log index c353f2d..ee97d41 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 284.75 ± 2.41 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.80 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 196.23 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.93 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log index 875df42..d4269f9 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 299.68 ± 1.75 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.93 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 300.96 ± 2.64 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 18.57 ± 0.01 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log similarity index 51% rename from benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log index 60d4cb4..77cdb17 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x36543290) reason :GPU Hang -✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 failed (exit 134) +HW Exception by GPU node-1 (Agent handle: 0x1356700) reason :GPU Hang +✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __hblt0__fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log index d2d7b9c..716fe5b 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 194.33 ± 1.56 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 20.64 ± 0.01 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 195.52 ± 1.35 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 20.65 ± 0.04 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log similarity index 59% rename from benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log index 9b30c60..41ac42b 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 193.74 ± 0.96 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 21.05 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 81.66 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 13.06 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log index c152d77..75cece6 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 228.13 ± 3.26 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 20.88 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 230.79 ± 1.84 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 20.79 ± 0.04 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log similarity index 59% rename from benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log index ab3e1cd..4f18e20 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 223.80 ± 2.70 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 20.91 ± 0.02 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 89.56 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 15.73 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log similarity index 56% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log rename to benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log index f5aeb10..ec0c47e 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 134.45 ± 0.46 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 14.58 ± 0.04 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 141.46 ± 1.06 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.01 ± 0.00 | -build: 11f0af55 (6736) +build: 31df4608 (7038) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log similarity index 56% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0.log rename to benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log index 7584081..cd33aff 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 133.50 ± 0.69 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 14.55 ± 0.02 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 48.54 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.81 ± 0.00 | -build: 4807e8f9 (6609) +build: 31df4608 (7038) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log new file mode 100644 index 0000000..73ef692 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 143.18 ± 0.54 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.08 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..d8bed5d --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log @@ -0,0 +1,9 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 28.32 ± 0.00 | +HW Exception by GPU node-1 (Agent handle: 0xc4754f0) reason :GPU Hang +✖ ! [rocm-7alpha-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1.log new file mode 100644 index 0000000..17446eb --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 144.03 ± 1.12 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.04 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx32768.log new file mode 100644 index 0000000..8ecc87e --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 38.64 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 9.44 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma.log deleted file mode 100644 index fcbf28b..0000000 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 131.74 ± 0.40 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 14.54 ± 0.00 | - -build: 11f0af55 (6736) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log index 7a81d3e..6dda78b 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 142.30 ± 0.82 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.90 ± 0.12 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 143.65 ± 1.06 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.07 ± 0.00 | -build: 11f0af55 (6736) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..f4da2f8 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log @@ -0,0 +1,9 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 24.32 ± 0.00 | +HW Exception by GPU node-1 (Agent handle: 0x33d785c0) reason :GPU Hang +✖ ! [rocm6_4_4-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log index dd67609..d576265 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 141.69 ± 0.47 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.95 ± 0.09 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 142.82 ± 1.43 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.11 ± 0.01 | -build: 11f0af55 (6736) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..c90d084 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,9 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 24.00 ± 0.00 | +HW Exception by GPU node-1 (Agent handle: 0x252355c0) reason :GPU Hang +✖ ! [rocm6_4_4-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __hblt0__fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4.log deleted file mode 100644 index b011d5b..0000000 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 132.25 ± 0.49 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 14.39 ± 0.00 | - -build: 4807e8f9 (6609) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log index 8b89d24..6a140e6 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 144.16 ± 0.77 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.87 ± 0.05 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 146.45 ± 0.58 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 14.88 ± 0.01 | -build: 4807e8f9 (6609) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..c1231ef --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 47.05 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 9.63 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log index e329c2d..aec88c2 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 143.26 ± 0.97 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.87 ± 0.08 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 145.30 ± 1.01 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 14.98 ± 0.00 | -build: 4807e8f9 (6609) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..1371602 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 48.23 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 9.36 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1.log new file mode 100644 index 0000000..2b82621 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 144.51 ± 1.04 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.16 ± 0.01 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..f55ebec --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log @@ -0,0 +1,9 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 27.99 ± 0.00 | +HW Exception by GPU node-1 (Agent handle: 0x169288d0) reason :GPU Hang +✖ ! [rocm7.1-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..5cede6c --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 144.56 ± 0.30 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.16 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..82c98a6 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 27.82 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.99 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__fa1.log new file mode 100644 index 0000000..eecc347 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 146.23 ± 0.15 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.00 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__fa1__longctx32768.log new file mode 100644 index 0000000..5878978 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 39.93 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.45 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__hblt0__fa1.log new file mode 100644 index 0000000..53c4e7e --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 146.00 ± 0.62 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 14.97 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..6c54dcb --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 40.14 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.09 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma.log deleted file mode 100644 index f29c25e..0000000 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 132.68 ± 0.50 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 14.62 ± 0.08 | - -build: a3cb0474 (6735) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log index 0bdbcf4..2e72a6e 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 142.60 ± 0.56 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.90 ± 0.09 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 144.65 ± 0.59 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.20 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log similarity index 56% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log rename to benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log index ae0c02f..9a2c46e 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 133.39 ± 0.54 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 14.62 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 27.94 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.94 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log deleted file mode 100644 index 5b1eea3..0000000 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 133.60 ± 0.66 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 14.59 ± 0.06 | - -build: a3cb0474 (6735) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log index 200b3a6..019785a 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 140.15 ± 1.20 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.93 ± 0.05 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 142.70 ± 0.16 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.19 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..92dfe4f --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,9 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 28.48 ± 0.00 | +HW Exception by GPU node-1 (Agent handle: 0x66cc700) reason :GPU Hang +✖ ! [rocm7_rc-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __hblt0__fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log index 53e3d92..c9db7cc 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 146.88 ± 0.69 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 15.00 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 145.17 ± 0.07 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.03 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1__longctx32768.log similarity index 56% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0.log rename to benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1__longctx32768.log index 9d4c6ed..2c848be 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 134.05 ± 0.64 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 14.64 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 39.50 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 9.24 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log index bf7dc06..20fdc35 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 144.44 ± 0.78 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.99 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 143.77 ± 0.88 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.01 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..1062bdd --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log @@ -0,0 +1,7 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +Memory access fault by GPU node-1 (Agent handle: 0x367b7700) on address 0x7f01ade91000. Reason: Page not present or supervisor privilege. +:0:rocdevice.cpp :3582: 3127764077 us: Callback: Queue 0x7f1ba1300000 aborting with error : HSA_STATUS_ERROR_EXCEPTION: An HSAIL operation resulted in a hardware exception. code: 0x1016 +✖ ! [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __hblt0__fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log index 538d4bf..9cb68ed 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp512 | 135.43 ± 4.81 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg128 | 17.14 ± 0.02 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp512 | 139.19 ± 0.25 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg128 | 17.45 ± 0.01 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log similarity index 60% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log rename to benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log index ce992e9..cd77384 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 0 | pp512 | 136.12 ± 1.49 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 0 | tg128 | 17.32 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 16.94 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 3.42 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log index 2c9d496..e5c8408 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp512 | 125.48 ± 4.53 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg128 | 18.02 ± 0.01 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp512 | 128.55 ± 1.17 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg128 | 18.47 ± 0.01 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log similarity index 60% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log rename to benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log index d4abf61..ee7cbf7 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 0 | pp512 | 120.72 ± 3.00 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 0 | tg128 | 17.74 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 22.82 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 6.88 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log similarity index 56% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log rename to benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log index 61f0eec..0a2e7d3 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 346.51 ± 4.73 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 25.49 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 478.10 ± 4.01 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 26.92 ± 0.01 | -build: 11f0af55 (6736) +build: 31df4608 (7038) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log similarity index 56% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0.log rename to benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log index 5d594f4..42a9ab9 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 343.63 ± 2.43 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 25.52 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 197.19 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.06 ± 0.00 | -build: 4807e8f9 (6609) +build: 31df4608 (7038) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log new file mode 100644 index 0000000..30d4f60 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 483.01 ± 4.72 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 27.05 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..51c3480 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 147.27 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.44 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1.log new file mode 100644 index 0000000..94a2f44 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 482.27 ± 5.93 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 27.04 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1__longctx32768.log new file mode 100644 index 0000000..461c2aa --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 164.31 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.02 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma.log deleted file mode 100644 index 6732114..0000000 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -hipBLASLt error: Heuristic Fetch Failed! -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 436.29 ± 4.51 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 25.54 ± 0.00 | - -build: 11f0af55 (6736) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log index 5fb9bdf..db05a01 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 480.95 ± 4.32 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 25.82 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 479.75 ± 5.18 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 26.90 ± 0.00 | -build: 11f0af55 (6736) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..07d269e --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 107.91 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 12.52 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log index 79b1058..ff5b133 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 356.62 ± 6.87 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 25.85 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 360.25 ± 7.40 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 26.84 ± 0.01 | -build: 11f0af55 (6736) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..ee12796 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 107.25 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 12.50 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4.log deleted file mode 100644 index 1b2bbe2..0000000 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -hipBLASLt error: Heuristic Fetch Failed! -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 435.70 ± 6.51 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.23 ± 0.00 | - -build: 4807e8f9 (6609) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log index c0c21f6..3ba2948 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 485.65 ± 7.08 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 25.71 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 493.29 ± 1.77 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 26.72 ± 0.00 | -build: 4807e8f9 (6609) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..c9857c4 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 208.98 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 18.97 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log index 63fd57e..89121d5 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 364.38 ± 4.78 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 25.83 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 362.53 ± 2.36 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 26.80 ± 0.00 | -build: 4807e8f9 (6609) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..f1ed352 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 244.87 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.02 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__fa1.log new file mode 100644 index 0000000..be5c585 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 484.23 ± 1.92 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 27.12 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..cfb2c8a --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 131.05 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 12.59 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..31947ad --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 357.45 ± 1.20 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 27.18 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..33f61ce --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 127.49 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 12.57 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__fa1.log new file mode 100644 index 0000000..5ae522c --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 485.60 ± 4.06 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 26.98 ± 0.01 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__fa1__longctx32768.log new file mode 100644 index 0000000..a157427 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 174.51 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 18.95 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__hblt0__fa1.log new file mode 100644 index 0000000..5fdd5ef --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 360.51 ± 0.53 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 26.98 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..6612bb7 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 174.18 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 18.94 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log index ae8f244..65616ae 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 477.05 ± 5.97 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 25.84 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 472.32 ± 1.65 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 27.13 ± 0.01 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log similarity index 56% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log rename to benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log index 5e01b02..121506c 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 439.13 ± 4.42 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 25.54 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 129.44 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 12.59 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log index 32dc9c5..1e4b094 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 347.28 ± 5.57 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 25.86 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 348.39 ± 4.07 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 27.16 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log similarity index 56% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0.log rename to benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log index 3cb901f..d1840b6 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 334.68 ± 2.49 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 25.54 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 125.66 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 12.60 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log index 8f60a35..b987962 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 482.81 ± 7.55 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 25.85 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 487.44 ± 3.08 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 26.97 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1__longctx32768.log similarity index 56% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma.log rename to benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1__longctx32768.log index c7d6636..370b160 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 435.87 ± 4.36 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 25.56 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 194.86 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.04 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log index 788a85a..55406b9 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 346.28 ± 2.02 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 25.80 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 350.49 ± 4.28 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 27.05 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log similarity index 56% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log rename to benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log index b5657e7..8c008da 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 338.71 ± 3.32 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 25.51 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 174.05 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 18.97 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log index c030b4b..f94a365 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 216.46 ± 0.31 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 10.00 ± 0.01 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 191.31 ± 0.05 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 9.96 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log similarity index 60% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log rename to benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log index c1d96cf..316a419 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | pp512 | 216.27 ± 0.39 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | tg128 | 10.07 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 53.97 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 7.47 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log index a9da670..78a6ebf 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 166.05 ± 0.25 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 9.29 ± 0.02 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 165.85 ± 0.14 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 9.35 ± 0.01 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log similarity index 60% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log rename to benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log index 6fe4ca5..9270cda 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | pp512 | 163.35 ± 0.20 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | tg128 | 9.24 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 73.78 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 7.95 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__fa1.log similarity index 56% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0.log rename to benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__fa1.log index cc78e33..26cf059 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 589.82 ± 5.37 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 55.38 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 573.35 ± 5.61 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 57.97 ± 0.00 | -build: 11f0af55 (6736) +build: 31df4608 (7038) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log similarity index 56% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0.log rename to benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log index 0b85b91..4e1681f 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 582.53 ± 3.05 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 55.41 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 201.75 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 30.78 ± 0.00 | -build: 4807e8f9 (6609) +build: 31df4608 (7038) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1.log new file mode 100644 index 0000000..c54f7f7 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 568.92 ± 3.37 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 58.40 ± 0.01 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..ea8d8de --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 147.26 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.73 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1.log new file mode 100644 index 0000000..52ebd79 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 574.31 ± 5.95 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 58.21 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1__longctx32768.log new file mode 100644 index 0000000..ce9fa1f --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 160.06 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 30.70 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma.log deleted file mode 100644 index 6c61f6d..0000000 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 562.46 ± 5.25 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 55.16 ± 0.01 | - -build: 11f0af55 (6736) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1.log index d40f50f..4c2afa0 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 626.72 ± 6.27 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.04 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 622.81 ± 3.95 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 57.81 ± 0.01 | -build: 11f0af55 (6736) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..27ab885 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 109.56 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 16.89 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log index 257a045..6293fb1 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 620.07 ± 8.69 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 56.88 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 625.44 ± 4.55 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 57.89 ± 0.00 | -build: 11f0af55 (6736) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..4ad1908 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 108.66 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 16.92 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4.log deleted file mode 100644 index 98cada8..0000000 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 556.95 ± 4.88 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 54.93 ± 0.00 | - -build: 4807e8f9 (6609) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log index e913334..c100d84 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 632.67 ± 5.74 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 56.81 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 640.29 ± 6.85 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 56.58 ± 0.02 | -build: 4807e8f9 (6609) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..931a15a --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 203.61 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 30.59 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log index 48c1c1e..ad68852 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 629.19 ± 4.25 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 56.94 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 632.09 ± 4.14 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 57.49 ± 0.00 | -build: 4807e8f9 (6609) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..937aa31 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 204.05 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 30.58 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__fa1.log new file mode 100644 index 0000000..5e9f0dd --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 620.61 ± 2.27 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 57.85 ± 0.01 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..6a603bd --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 131.70 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 16.90 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..92621e0 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 617.37 ± 6.53 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 57.82 ± 0.01 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..4180a74 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 132.17 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 16.89 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__fa1.log new file mode 100644 index 0000000..db13558 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 628.16 ± 1.90 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 57.33 ± 0.01 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__fa1__longctx32768.log new file mode 100644 index 0000000..4ebd080 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 167.01 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 30.22 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__hblt0__fa1.log new file mode 100644 index 0000000..8a80c84 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 628.24 ± 3.85 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 57.33 ± 0.01 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..0eae3fb --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 169.20 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 30.20 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log index d5596ea..1ec1c21 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 623.10 ± 4.22 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 56.95 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 625.38 ± 1.39 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 58.11 ± 0.01 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1__longctx32768.log similarity index 56% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log rename to benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1__longctx32768.log index f8fc311..475cf3b 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 553.07 ± 3.87 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 55.49 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 131.65 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 16.88 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log index d7e04e5..68c6582 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 619.92 ± 6.18 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.09 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 616.46 ± 1.40 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 57.69 ± 0.01 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log similarity index 56% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0.log rename to benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log index bf0ad55..b26686d 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 588.10 ± 4.51 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 55.49 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 132.89 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 16.87 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log index b946331..542f5fc 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 632.51 ± 3.87 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 56.97 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 625.22 ± 5.42 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 57.35 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1__longctx32768.log similarity index 56% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma.log rename to benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1__longctx32768.log index ecf9b98..ac04a7e 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 557.13 ± 5.01 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 55.60 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 168.61 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 30.27 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log index 041dc76..0424b40 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 631.26 ± 5.65 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 56.97 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 626.37 ± 6.13 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 57.38 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1__longctx32768.log similarity index 56% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0.log rename to benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1__longctx32768.log index 6caa371..6f76e6d 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 586.15 ± 3.69 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 55.49 ± 0.02 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 170.36 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 30.25 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log index 8d5b733..6ae52c4 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 1020.41 ± 5.76 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 59.42 ± 0.04 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 1013.46 ± 4.96 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 62.10 ± 0.01 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log similarity index 60% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log rename to benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log index c9faa6f..9c287b6 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 0 | pp512 | 1053.02 ± 7.31 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 0 | tg128 | 63.84 ± 0.04 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 69.70 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 20.21 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log index 13a53e9..b783151 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 857.47 ± 4.38 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 63.41 ± 0.08 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 853.23 ± 3.21 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 66.93 ± 0.05 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log similarity index 60% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log rename to benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log index fbfb810..109e2cc 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 0 | pp512 | 823.60 ± 3.91 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 0 | tg128 | 64.74 ± 0.13 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 104.06 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 29.97 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__fa1.log similarity index 56% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0.log rename to benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__fa1.log index a70acc8..e5dc74f 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 620.19 ± 1.89 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 66.33 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 673.50 ± 8.17 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 70.76 ± 0.02 | -build: 11f0af55 (6736) +build: 31df4608 (7038) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log similarity index 56% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0.log rename to benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log index 1fc02dd..09931b2 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 610.15 ± 6.46 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 66.15 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 204.86 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.65 ± 0.00 | -build: 4807e8f9 (6609) +build: 31df4608 (7038) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1.log new file mode 100644 index 0000000..3f45fdd --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 674.15 ± 10.24 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 71.14 ± 0.01 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..95a5c67 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 150.94 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.92 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1.log new file mode 100644 index 0000000..83e92bc --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 675.10 ± 3.41 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 71.06 ± 0.01 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1__longctx32768.log new file mode 100644 index 0000000..c72b204 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 161.39 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.57 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma.log deleted file mode 100644 index 65953d1..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 614.24 ± 6.08 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 66.41 ± 0.03 | - -build: 11f0af55 (6736) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1.log index 17c5aae..2559d41 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 662.07 ± 2.63 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 68.40 ± 0.02 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 663.26 ± 2.04 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 68.79 ± 0.03 | -build: 11f0af55 (6736) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..2eb5017 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 108.11 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.39 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1.log index 4aaf195..2dcdbc5 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 659.42 ± 4.75 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 68.39 ± 0.07 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 655.75 ± 5.39 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 68.70 ± 0.04 | -build: 11f0af55 (6736) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..33683ff --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 109.44 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.45 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4.log deleted file mode 100644 index 731ac4f..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 594.00 ± 6.13 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 66.17 ± 0.02 | - -build: 4807e8f9 (6609) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log index 5a69a77..3723493 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 665.68 ± 5.62 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 68.36 ± 0.02 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 674.37 ± 11.18 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 67.62 ± 0.02 | -build: 4807e8f9 (6609) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..836433f --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 204.29 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.04 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log index 54d202e..7fd03c1 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 662.57 ± 4.91 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 68.26 ± 0.02 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 665.28 ± 7.13 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 68.57 ± 0.02 | -build: 4807e8f9 (6609) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..4ae34fe --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 204.17 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 32.97 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__fa1.log new file mode 100644 index 0000000..584ee09 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 649.91 ± 5.41 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 69.03 ± 0.02 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..836a7e9 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 132.49 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.48 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..c3d0904 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 658.06 ± 8.10 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 69.11 ± 0.03 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..9c6b809 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 132.60 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.50 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__fa1.log new file mode 100644 index 0000000..07b2e4b --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 662.81 ± 8.45 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 68.77 ± 0.02 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__fa1__longctx32768.log new file mode 100644 index 0000000..8e3c04e --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 167.05 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 32.58 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__hblt0__fa1.log new file mode 100644 index 0000000..972095e --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 660.13 ± 8.26 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 68.73 ± 0.03 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..8abab36 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 169.50 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 32.56 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1.log index 6e66597..75c65e9 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 652.79 ± 5.72 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 68.70 ± 0.02 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 650.55 ± 3.12 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 69.01 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1__longctx32768.log similarity index 56% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc.log rename to benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1__longctx32768.log index 9ce0543..d0ef149 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 606.05 ± 4.70 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 66.71 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 132.66 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.46 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1.log index bbf7bdc..7e96a4d 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 655.09 ± 6.42 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 68.71 ± 0.09 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 647.31 ± 2.83 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 69.01 ± 0.03 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log similarity index 56% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0.log rename to benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log index 56df4ac..b0cb357 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 616.68 ± 3.21 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 66.75 ± 0.02 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 131.47 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.52 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1.log index 90bb4a4..5795548 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 662.07 ± 3.18 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 68.49 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 660.75 ± 2.92 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 68.92 ± 0.01 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1__longctx32768.log similarity index 56% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma.log rename to benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1__longctx32768.log index e268e10..157a738 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 606.51 ± 6.65 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 66.58 ± 0.02 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 166.44 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 32.63 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1.log index 8d3002f..77e0839 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 660.44 ± 3.81 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 68.74 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 662.51 ± 3.72 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 68.75 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1__longctx32768.log similarity index 56% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0.log rename to benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1__longctx32768.log index e7f04a8..4ccb4fd 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 616.56 ± 6.18 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 66.64 ± 0.03 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 170.10 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 32.53 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log index fa7144b..f2ecb66 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 773.64 ± 3.79 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 77.82 ± 0.06 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 774.61 ± 2.12 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 81.31 ± 0.07 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log similarity index 60% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk.log rename to benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log index d7e98cb..baf1c37 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 0 | pp512 | 793.90 ± 3.33 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 0 | tg128 | 83.96 ± 0.15 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 68.30 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 21.82 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log index 0a9e287..a5ec37f 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 832.99 ± 3.06 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 81.40 ± 0.12 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 832.44 ± 3.18 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 87.24 ± 0.18 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log similarity index 60% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv.log rename to benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log index fd02c27..528b0a6 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 0 | pp512 | 800.63 ± 2.65 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 0 | tg128 | 83.73 ± 0.11 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 100.73 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 33.30 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1.log similarity index 56% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0.log rename to benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1.log index 6ed8490..c222b4c 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 781.56 ± 1.57 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.16 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 751.05 ± 61.73 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 14.21 ± 0.00 | -build: 11f0af55 (6736) +build: 31df4608 (7038) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log similarity index 56% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0.log rename to benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log index a1c9d13..b2ce95e 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 772.99 ± 2.45 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.15 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 323.48 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.58 ± 0.00 | -build: 4807e8f9 (6609) +build: 31df4608 (7038) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log new file mode 100644 index 0000000..457da89 --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 795.35 ± 0.84 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 14.16 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..386e6b7 --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 354.28 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.03 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1.log new file mode 100644 index 0000000..9d36ff9 --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 816.42 ± 1.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 14.25 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1__longctx32768.log new file mode 100644 index 0000000..4fb65df --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 345.30 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.59 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma.log deleted file mode 100644 index 4a51b81..0000000 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 747.69 ± 1.06 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.17 ± 0.00 | - -build: 11f0af55 (6736) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log index b7f2bb0..9318d52 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 809.20 ± 0.75 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.11 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 811.49 ± 0.16 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 14.14 ± 0.00 | -build: 11f0af55 (6736) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..b3f919a --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 151.46 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.18 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log index e28a2d0..a3541b4 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 819.61 ± 0.83 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.11 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 819.41 ± 1.64 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 14.15 ± 0.00 | -build: 11f0af55 (6736) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..0e73bbb --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 149.96 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.19 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4.log deleted file mode 100644 index 2cf28d1..0000000 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 720.89 ± 0.70 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.15 ± 0.00 | - -build: 4807e8f9 (6609) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log index 888ad2c..4a106ef 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 815.58 ± 1.47 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.08 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 826.24 ± 1.79 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 14.24 ± 0.00 | -build: 4807e8f9 (6609) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..ad48dd5 --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 267.36 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.56 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log index e3c4756..b9b9282 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 824.58 ± 1.69 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.10 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 833.10 ± 1.71 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 14.21 ± 0.00 | -build: 4807e8f9 (6609) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..4bd97eb --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 252.65 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.56 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__fa1.log new file mode 100644 index 0000000..8eba04b --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 797.32 ± 10.12 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 14.16 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..6aa5b8e --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 180.48 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.20 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..1c805a6 --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 810.77 ± 1.82 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 14.18 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..6883e75 --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 166.10 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.20 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__fa1.log new file mode 100644 index 0000000..97452bc --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 819.82 ± 2.06 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 14.25 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__fa1__longctx32768.log new file mode 100644 index 0000000..2394aec --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 269.95 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.59 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__hblt0__fa1.log new file mode 100644 index 0000000..d5fe530 --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 832.48 ± 2.03 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 14.26 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..4159fec --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 266.41 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.58 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log index c4a6a7a..7a42f38 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 799.43 ± 1.14 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.11 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 802.23 ± 0.79 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 14.16 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx32768.log similarity index 56% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log rename to benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx32768.log index 7685df5..5d4eb49 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 734.16 ± 0.84 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.16 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 167.45 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.21 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log index 2dfa40d..9694972 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 788.46 ± 1.56 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.13 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 790.38 ± 0.48 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 14.16 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log similarity index 56% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0.log rename to benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log index f1f644a..817f7a7 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 763.42 ± 1.37 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.17 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 170.65 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.20 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log index bb91b3b..0cfd222 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 816.86 ± 0.80 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.13 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 813.63 ± 8.49 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 14.21 ± 0.03 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1__longctx32768.log similarity index 56% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma.log rename to benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1__longctx32768.log index 1ca88e0..c8f08ef 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 734.90 ± 0.79 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.17 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 265.54 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.59 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log index d8b9701..5e8c571 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 806.70 ± 1.39 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.12 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 813.23 ± 0.99 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 14.26 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx32768.log similarity index 56% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0.log rename to benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx32768.log index ed2ad92..8acf2bf 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 762.49 ± 1.04 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.17 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 252.49 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.59 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log index 2cbc33d..be25934 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp512 | 648.34 ± 0.61 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg128 | 14.52 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp512 | 590.41 ± 71.66 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg128 | 14.51 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log similarity index 60% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log rename to benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log index 4af2fdc..25c302d 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 0 | pp512 | 668.85 ± 1.34 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 0 | tg128 | 14.64 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 17.29 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 11.88 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log index db5ed22..19663a7 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp512 | 532.11 ± 3.15 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg128 | 13.98 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp512 | 533.84 ± 0.83 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg128 | 13.99 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log similarity index 60% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log rename to benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log index cd18136..1e15198 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 0 | pp512 | 541.39 ± 3.33 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 0 | tg128 | 14.17 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 219.21 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 9.99 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log similarity index 56% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log rename to benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log index a9a2107..971037c 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 477.22 ± 0.76 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.10 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 439.12 ± 31.65 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 4.01 ± 0.00 | -build: 11f0af55 (6736) +build: 31df4608 (7038) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log similarity index 56% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0.log rename to benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log index 9e006da..a95bffd 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 471.17 ± 0.97 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.10 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 112.92 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.72 ± 0.00 | -build: 4807e8f9 (6609) +build: 31df4608 (7038) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log new file mode 100644 index 0000000..9557b2c --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 428.59 ± 48.51 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 4.02 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..2f24b71 --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 93.71 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.11 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1.log new file mode 100644 index 0000000..ea9b476 --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 409.77 ± 60.03 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 4.02 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1__longctx32768.log new file mode 100644 index 0000000..4643958 --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 192.30 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.57 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma.log deleted file mode 100644 index 09c19a8..0000000 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -hipBLASLt error: Heuristic Fetch Failed! -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 420.14 ± 0.69 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.10 ± 0.00 | - -build: 11f0af55 (6736) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log index 1a4d459..2e31fbe 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 468.87 ± 0.84 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.11 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 469.22 ± 0.41 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 4.01 ± 0.00 | -build: 11f0af55 (6736) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..0f58452 --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 92.10 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.42 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log index 033670c..22e7612 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 524.62 ± 0.55 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.11 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 524.21 ± 1.37 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 4.01 ± 0.00 | -build: 11f0af55 (6736) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..e5b9e9f --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 93.57 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.42 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4.log deleted file mode 100644 index 21f076f..0000000 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -hipBLASLt error: Heuristic Fetch Failed! -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 413.24 ± 0.72 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.10 ± 0.00 | - -build: 4807e8f9 (6609) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log index 5e53f53..dab5c8e 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 471.95 ± 1.68 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.10 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 472.47 ± 0.58 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 4.00 ± 0.00 | -build: 4807e8f9 (6609) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..1676f6f --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 182.47 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.72 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log index 95c2c68..0f8145c 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 529.49 ± 1.20 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.10 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 530.73 ± 0.34 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 4.00 ± 0.00 | -build: 4807e8f9 (6609) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..09322c1 --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 204.11 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.72 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__fa1.log new file mode 100644 index 0000000..a728a7e --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 463.62 ± 0.34 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 4.04 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..bc3fd22 --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 113.46 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.43 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..96cab07 --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 522.69 ± 0.87 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 4.04 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..ef863b6 --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 115.46 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.43 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__fa1.log new file mode 100644 index 0000000..cc3c3bf --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 434.79 ± 46.53 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 4.04 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__fa1__longctx32768.log new file mode 100644 index 0000000..71c3bde --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 179.10 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.74 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__hblt0__fa1.log new file mode 100644 index 0000000..355f439 --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 524.39 ± 1.39 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 4.04 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..2a48b32 --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 195.60 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.74 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log index b0606d8..687bbbd 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 464.58 ± 0.58 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.11 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 463.45 ± 0.58 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 4.04 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log similarity index 56% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log rename to benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log index 76634c0..5824c19 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 413.95 ± 0.60 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.00 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 111.71 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.44 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log index b2bf9fe..2d69680 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 499.11 ± 0.58 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.11 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 499.44 ± 0.09 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 4.04 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log similarity index 56% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0.log rename to benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log index 2ff4d14..849f667 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 457.65 ± 0.59 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.10 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 115.59 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.44 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log index 23fed27..6b32ee7 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 469.08 ± 0.27 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.11 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 470.06 ± 0.56 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 4.04 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1__longctx32768.log similarity index 56% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma.log rename to benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1__longctx32768.log index 98c4607..bbcd196 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 421.40 ± 0.43 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.10 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 177.69 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.74 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log index 4664ca1..3d27335 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 503.26 ± 0.79 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.11 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 501.79 ± 0.45 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 4.04 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log similarity index 56% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log rename to benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log index 98a3a10..0bcabf8 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 458.08 ± 0.81 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.10 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 198.39 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.74 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log similarity index 72% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk.log rename to benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log index cd92808..1586d96 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | ggml_vulkan: Device memory allocation of size 2819260416 failed. ggml_vulkan: Requested buffer size exceeds device buffer size limit: ErrorOutOfDeviceMemory main: error: failed to load model '/home/kyuz0/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf' -✖ ! [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002 failed (exit 1) +✖ ! [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002 __fa1__longctx32768 failed (exit 1) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log index 26c11fc..b0e18ba 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | pp512 | 107.33 ± 1.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | tg128 | 3.91 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | pp512 | 106.82 ± 1.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | tg128 | 3.92 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log similarity index 60% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log rename to benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log index fdd7c39..0fa299f 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 0 | pp512 | 103.58 ± 1.09 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 0 | tg128 | 3.93 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 62.49 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 3.63 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__fa1.log similarity index 56% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0.log rename to benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__fa1.log index fd554fe..431efbd 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 2072.56 ± 8.20 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 79.03 ± 0.02 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 2224.91 ± 1.45 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 75.58 ± 9.31 | -build: 11f0af55 (6736) +build: 31df4608 (7038) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log similarity index 56% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0.log rename to benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log index 1fabbb8..b15abfe 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 2038.57 ± 4.97 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 79.42 ± 0.02 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1239.19 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 59.92 ± 0.00 | -build: 4807e8f9 (6609) +build: 31df4608 (7038) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1.log new file mode 100644 index 0000000..ca2c81f --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 2245.25 ± 4.85 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 75.82 ± 8.64 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..201e592 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1224.83 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 58.09 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1.log new file mode 100644 index 0000000..6eb525c --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 2256.38 ± 8.98 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 84.67 ± 0.01 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1__longctx32768.log new file mode 100644 index 0000000..7e3987d --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1206.03 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 59.51 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma.log deleted file mode 100644 index 1e894d0..0000000 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 1935.04 ± 3.89 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 79.17 ± 0.01 | - -build: 11f0af55 (6736) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1.log index 3e8bfa0..4e866ac 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2278.78 ± 8.79 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 76.94 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 2283.48 ± 2.94 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 78.74 ± 0.13 | -build: 11f0af55 (6736) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..9d4bdf7 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 898.63 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 57.15 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1.log index 15697a2..1940700 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2158.84 ± 4.74 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 77.11 ± 0.04 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 2154.45 ± 10.83 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 76.62 ± 3.81 | -build: 11f0af55 (6736) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..9744908 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 855.04 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 57.04 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4.log deleted file mode 100644 index 712d57c..0000000 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 1896.32 ± 6.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 79.32 ± 0.08 | - -build: 4807e8f9 (6609) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log index d57318f..3f0a925 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2261.52 ± 12.45 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 77.18 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 2276.80 ± 11.52 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 82.07 ± 0.02 | -build: 4807e8f9 (6609) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..bfc5d28 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1497.53 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 58.57 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log index 02ce6c2..01c1a9e 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2127.98 ± 4.53 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 77.17 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 2161.24 ± 6.51 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 82.35 ± 0.01 | -build: 4807e8f9 (6609) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..3805856 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1440.10 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 58.38 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__fa1.log new file mode 100644 index 0000000..57b3971 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 2269.02 ± 4.71 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 59.93 ± 6.59 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..e02d30a --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1031.65 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 56.77 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..3020474 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 2141.35 ± 2.64 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 64.63 ± 11.41 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..99cf514 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1002.59 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 56.91 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1__fa1.log new file mode 100644 index 0000000..fdf8139 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 2261.65 ± 12.88 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 62.69 ± 7.43 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1__fa1__longctx32768.log new file mode 100644 index 0000000..d8a31db --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1160.50 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 58.25 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1__hblt0__fa1.log new file mode 100644 index 0000000..ec3a243 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 2171.00 ± 3.48 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 65.68 ± 10.35 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..2c61af5 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1240.47 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 58.34 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log index 6cc66c6..617a0a7 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2249.97 ± 8.38 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 77.23 ± 0.02 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 2257.61 ± 5.08 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 78.84 ± 0.01 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1__longctx32768.log similarity index 56% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log rename to benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1__longctx32768.log index cceaf6f..38a595f 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 1865.48 ± 5.97 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 79.26 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1046.30 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 56.97 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log index 1615a42..ed4dea8 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2110.98 ± 11.29 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 77.03 ± 0.03 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 2102.34 ± 8.12 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 67.40 ± 10.30 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log similarity index 56% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0.log rename to benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log index 6d11543..d5495a2 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 2034.18 ± 7.80 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 79.20 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1033.75 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 56.84 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log index 21750b0..45c07a2 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2265.97 ± 12.20 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 77.23 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 2275.52 ± 10.03 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 72.45 ± 10.56 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1__longctx32768.log similarity index 56% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma.log rename to benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1__longctx32768.log index 6382308..aa94541 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 1858.93 ± 11.45 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 79.18 ± 0.02 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1168.34 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 58.08 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log index ad23019..0bbe924 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2104.47 ± 6.90 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 77.16 ± 0.02 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 2114.70 ± 2.89 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 68.07 ± 12.18 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1__longctx32768.log similarity index 56% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0.log rename to benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1__longctx32768.log index cf36853..ccee19b 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 2045.87 ± 7.65 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 79.18 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1227.13 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 58.33 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log index c23a128..a0b24fe 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp512 | 1193.42 ± 154.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg128 | 82.87 ± 1.37 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp512 | 1229.75 ± 236.47 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg128 | 75.94 ± 2.23 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log similarity index 60% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log rename to benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log index 38633bd..b0d1ee5 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 0 | pp512 | 1417.85 ± 229.30 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 0 | tg128 | 85.91 ± 1.98 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 145.82 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 64.34 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log index fd50648..eab1c8f 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp512 | 1150.84 ± 174.29 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg128 | 85.89 ± 0.11 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp512 | 1116.46 ± 204.92 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg128 | 78.27 ± 2.29 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log similarity index 60% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log rename to benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log index e37eefd..4e79c73 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 0 | pp512 | 1256.94 ± 209.42 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 0 | tg128 | 92.19 ± 0.42 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 646.29 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 45.91 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0.log b/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__fa1.log similarity index 56% rename from benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0.log rename to benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__fa1.log index ed3709f..03aa189 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 700.13 ± 3.54 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 34.79 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 669.82 ± 4.74 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 37.40 ± 0.01 | -build: 11f0af55 (6736) +build: 31df4608 (7038) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4__hblt0.log b/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log similarity index 56% rename from benchmark/results/gpt-oss-120b-F16__rocm6_4_4__hblt0.log rename to benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log index 4bc3cf3..b5827e0 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4__hblt0.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 687.78 ± 5.65 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 34.80 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 325.08 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 28.40 ± 0.00 | -build: 4807e8f9 (6609) +build: 31df4608 (7038) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__fa1.log new file mode 100644 index 0000000..9ec933d --- /dev/null +++ b/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 656.31 ± 30.59 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 37.35 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..ac7ca98 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 223.34 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.27 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm-7alpha__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm-7alpha__fa1.log new file mode 100644 index 0000000..6bb24dd --- /dev/null +++ b/benchmark/results/gpt-oss-120b-F16__rocm-7alpha__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 667.33 ± 4.16 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 37.43 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm-7alpha__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-F16__rocm-7alpha__fa1__longctx32768.log new file mode 100644 index 0000000..140ae37 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-F16__rocm-7alpha__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 262.90 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 28.21 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma.log deleted file mode 100644 index ed29934..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -hipBLASLt error: Heuristic Fetch Failed! -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 667.15 ± 5.65 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 34.77 ± 0.00 | - -build: 11f0af55 (6736) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__fa1.log index 3496a41..6c5e115 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 786.49 ± 4.02 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 35.16 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 778.24 ± 5.71 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 37.19 ± 0.01 | -build: 11f0af55 (6736) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..c7414f8 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 301.71 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.25 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0__fa1.log index 6e64f96..ed939fb 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 783.50 ± 5.44 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 35.12 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 783.56 ± 11.04 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 37.18 ± 0.01 | -build: 11f0af55 (6736) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..456bcc6 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 276.51 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 16.58 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4.log deleted file mode 100644 index e08e51b..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -hipBLASLt error: Heuristic Fetch Failed! -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 650.45 ± 2.11 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 32.90 ± 0.00 | - -build: 4807e8f9 (6609) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4__fa1.log index 6b8906f..cc0886f 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 790.90 ± 4.05 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 34.98 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 779.12 ± 1.84 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 36.55 ± 0.00 | -build: 4807e8f9 (6609) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..343c160 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 335.09 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 21.90 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4__hblt0__fa1.log index 9889db6..68f6644 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 792.00 ± 9.77 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 35.14 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 774.77 ± 13.26 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 37.20 ± 0.00 | -build: 4807e8f9 (6609) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..7189d80 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 454.32 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 30.39 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7.1-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm7.1-rocwmma__fa1.log new file mode 100644 index 0000000..4e8eea8 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-F16__rocm7.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 769.93 ± 0.42 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 37.36 ± 0.01 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7.1-rocwmma__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-F16__rocm7.1-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..35d9468 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-F16__rocm7.1-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 223.72 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.26 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7.1-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm7.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..5cdbd11 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-F16__rocm7.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 771.12 ± 3.66 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 37.39 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-F16__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..9dbd7d0 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-F16__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 226.41 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.75 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7.1__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm7.1__fa1.log new file mode 100644 index 0000000..e205824 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-F16__rocm7.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 757.60 ± 0.73 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 37.06 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7.1__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-F16__rocm7.1__fa1__longctx32768.log new file mode 100644 index 0000000..6ad4e1b --- /dev/null +++ b/benchmark/results/gpt-oss-120b-F16__rocm7.1__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 270.78 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 29.89 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7.1__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm7.1__hblt0__fa1.log new file mode 100644 index 0000000..c6b089a --- /dev/null +++ b/benchmark/results/gpt-oss-120b-F16__rocm7.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 763.20 ± 10.81 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 37.04 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7.1__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-F16__rocm7.1__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..b01e2f2 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-F16__rocm7.1__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 283.58 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 29.90 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log index 9eab1bb..199e45f 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 770.55 ± 4.47 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 35.07 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 775.86 ± 2.82 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 37.40 ± 0.01 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1__longctx32768.log similarity index 56% rename from benchmark/results/gpt-oss-120b-F16__rocm7_rc.log rename to benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1__longctx32768.log index e1cd695..1e40ed9 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 666.29 ± 5.04 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 34.80 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 225.21 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.26 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log index 3454955..e8d3b53 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 777.48 ± 7.78 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 35.14 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 776.83 ± 4.02 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 37.34 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log similarity index 56% rename from benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0.log rename to benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log index d6914c0..50ebc2a 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 702.07 ± 4.76 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 34.78 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 223.16 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 13.23 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc__fa1.log index 18b3f18..6407b06 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc__fa1.log @@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x6990260) reason :GPU Hang -✖ ! [rocm7_rc] gpt-oss-120b-F16 __fa1 failed (exit 134) +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 750.63 ± 5.94 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 37.04 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc__fa1__longctx32768.log similarity index 56% rename from benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma.log rename to benchmark/results/gpt-oss-120b-F16__rocm7_rc__fa1__longctx32768.log index f06c14e..dcb3220 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 660.37 ± 3.05 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 34.80 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 281.34 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 29.87 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log index e3e2103..ee52230 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 760.87 ± 22.70 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 35.07 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 756.08 ± 9.81 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 37.06 ± 0.01 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1__longctx32768.log similarity index 56% rename from benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0.log rename to benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1__longctx32768.log index 6d6b988..baf0f15 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 698.86 ± 6.39 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 34.80 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 284.00 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 29.76 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log b/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log index bf8a3fa..3c5b3a7 100644 --- a/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 719.39 ± 2.63 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 34.71 ± 0.02 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 720.94 ± 1.15 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 35.76 ± 0.02 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk.log b/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk__fa1__longctx32768.log similarity index 60% rename from benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk.log rename to benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk__fa1__longctx32768.log index 6027dce..ad679ba 100644 --- a/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk.log +++ b/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 0 | pp512 | 627.11 ± 1.45 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 0 | tg128 | 35.32 ± 0.02 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 166.61 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 25.02 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-F16__vulkan_radv__fa1.log b/benchmark/results/gpt-oss-120b-F16__vulkan_radv__fa1.log index bc7473a..1977efa 100644 --- a/benchmark/results/gpt-oss-120b-F16__vulkan_radv__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 481.71 ± 2.11 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 34.46 ± 0.02 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 513.71 ± 2.70 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 34.86 ± 0.01 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-F16__vulkan_radv.log b/benchmark/results/gpt-oss-120b-F16__vulkan_radv__fa1__longctx32768.log similarity index 60% rename from benchmark/results/gpt-oss-120b-F16__vulkan_radv.log rename to benchmark/results/gpt-oss-120b-F16__vulkan_radv__fa1__longctx32768.log index 41e503c..010091f 100644 --- a/benchmark/results/gpt-oss-120b-F16__vulkan_radv.log +++ b/benchmark/results/gpt-oss-120b-F16__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 0 | pp512 | 433.14 ± 1.74 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 0 | tg128 | 33.99 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 157.10 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 26.67 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log similarity index 56% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log rename to benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log index a3f8159..e396f32 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 703.72 ± 4.21 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 47.05 ± 0.02 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 673.11 ± 6.92 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 52.11 ± 0.00 | -build: 11f0af55 (6736) +build: 31df4608 (7038) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log similarity index 56% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0.log rename to benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log index d7b71ca..971cbac 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 660.34 ± 48.62 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 46.72 ± 0.39 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 331.82 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 36.00 ± 0.00 | -build: 4807e8f9 (6609) +build: 31df4608 (7038) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1.log new file mode 100644 index 0000000..4a5e683 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 665.60 ± 9.61 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.94 ± 0.01 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..1d374b5 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 224.65 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.73 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1.log new file mode 100644 index 0000000..90b7ef8 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 652.18 ± 8.35 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 52.02 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1__longctx32768.log new file mode 100644 index 0000000..8dfe9e9 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 254.16 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 35.85 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma.log deleted file mode 100644 index 750266d..0000000 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 653.32 ± 7.07 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 47.09 ± 0.01 | - -build: 11f0af55 (6736) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1.log index 8561f76..7d32aad 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 767.28 ± 2.81 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 47.63 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 767.82 ± 6.23 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.71 ± 0.01 | -build: 11f0af55 (6736) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..ff08d39 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 301.90 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.75 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log index 78d5aa9..a21151f 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 773.91 ± 4.34 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 47.61 ± 0.03 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 782.34 ± 9.39 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.76 ± 0.00 | -build: 11f0af55 (6736) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..85d2d56 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 293.43 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.44 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4.log deleted file mode 100644 index 251f8d5..0000000 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 644.73 ± 4.21 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 46.15 ± 0.00 | - -build: 4807e8f9 (6609) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log index 48364b4..f529ee6 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 766.09 ± 8.12 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 47.51 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 762.33 ± 0.82 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.67 ± 0.00 | -build: 4807e8f9 (6609) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..3c1ffc5 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 341.35 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 39.61 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log index 45683fa..7b25ca7 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 780.39 ± 3.58 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 47.70 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 778.37 ± 3.31 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.63 ± 0.01 | -build: 4807e8f9 (6609) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..d7ed569 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 358.69 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 39.51 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__fa1.log new file mode 100644 index 0000000..5cfe906 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 755.98 ± 7.49 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.78 ± 0.01 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..44f2fde --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 226.50 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.71 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..824e9db --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 766.72 ± 15.10 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.72 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..1b571bc --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 225.80 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.74 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__fa1.log new file mode 100644 index 0000000..4090438 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 742.07 ± 2.29 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.23 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__fa1__longctx32768.log new file mode 100644 index 0000000..65eff65 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 263.78 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 38.36 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__hblt0__fa1.log new file mode 100644 index 0000000..fe72ff6 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 763.92 ± 4.22 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.34 ± 0.01 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..a0a4c62 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 367.14 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 38.34 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log index 6fe114d..b87dc76 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 756.58 ± 4.67 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 47.62 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 753.49 ± 1.04 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.76 ± 0.01 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log similarity index 56% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log rename to benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log index 140d013..4f67e15 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 650.20 ± 4.73 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 47.07 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 226.23 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.45 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log index 224167a..3f9ccaa 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 772.03 ± 9.61 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 47.64 ± 0.04 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 774.40 ± 6.21 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.74 ± 0.01 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log similarity index 56% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0.log rename to benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log index 4a49ba5..bf10e38 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 698.26 ± 2.01 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 47.05 ± 0.12 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 226.01 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.73 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log index 4bae853..eb7d365 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 755.62 ± 4.68 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 47.70 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 746.02 ± 1.26 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.20 ± 0.01 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1__longctx32768.log similarity index 56% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma.log rename to benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1__longctx32768.log index 8222179..b80557c 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 651.94 ± 3.45 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 47.17 ± 0.02 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 274.48 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 32.72 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log index 5a90e19..bb466e7 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 773.20 ± 7.58 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 47.65 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 752.08 ± 9.39 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.38 ± 0.01 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log similarity index 56% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0.log rename to benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log index 303ed9d..0959919 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 700.53 ± 1.99 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 47.17 ± 0.02 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 284.03 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 38.23 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log index 5bf0d19..66d71b9 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 788.46 ± 4.36 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 50.32 ± 0.03 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 759.84 ± 2.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 52.66 ± 0.01 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log similarity index 60% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log rename to benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log index f988571..08a9873 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 0 | pp512 | 681.25 ± 3.69 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 0 | tg128 | 51.65 ± 0.03 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 169.89 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 32.63 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log index 7182fbe..6a55bcb 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 526.13 ± 3.20 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 52.90 ± 0.05 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 534.51 ± 0.52 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 54.67 ± 0.10 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log similarity index 60% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log rename to benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log index a3c335c..e6a3286 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 0 | pp512 | 464.26 ± 2.62 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 0 | tg128 | 52.85 ± 0.16 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 159.72 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 37.31 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0.log b/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__fa1.log similarity index 56% rename from benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0.log rename to benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__fa1.log index 94200e9..2a24d43 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1274.89 ± 11.66 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.18 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1430.02 ± 3.53 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 28.38 ± 0.00 | -build: 11f0af55 (6736) +build: 31df4608 (7038) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4__hblt0.log b/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log similarity index 56% rename from benchmark/results/gpt-oss-20b-F32__rocm6_4_4__hblt0.log rename to benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log index c3de17a..e8f169c 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4__hblt0.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1238.64 ± 11.98 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.26 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 551.38 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 24.35 ± 0.00 | -build: 4807e8f9 (6609) +build: 31df4608 (7038) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__fa1.log new file mode 100644 index 0000000..81766fe --- /dev/null +++ b/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1494.10 ± 6.72 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 28.26 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..b3e8085 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 357.87 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.88 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm-7alpha__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm-7alpha__fa1.log new file mode 100644 index 0000000..1e7e8ae --- /dev/null +++ b/benchmark/results/gpt-oss-20b-F32__rocm-7alpha__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1430.88 ± 12.04 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 28.37 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm-7alpha__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-F32__rocm-7alpha__fa1__longctx32768.log new file mode 100644 index 0000000..991dcb5 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-F32__rocm-7alpha__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 419.15 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 24.30 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma.log deleted file mode 100644 index 6df4ffa..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -hipBLASLt error: Heuristic Fetch Failed! -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1230.17 ± 12.16 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.22 ± 0.00 | - -build: 11f0af55 (6736) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__fa1.log index 09ce36a..dfc5846 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1493.11 ± 16.19 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 27.30 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1500.80 ± 17.51 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 28.24 ± 0.00 | -build: 11f0af55 (6736) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..a0525bf --- /dev/null +++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 479.16 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.87 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0__fa1.log index d7ccdc5..6fbbc9a 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1460.62 ± 17.09 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 27.32 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1440.00 ± 14.14 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 28.28 ± 0.00 | -build: 11f0af55 (6736) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..d615a03 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 478.62 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.86 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4.log deleted file mode 100644 index 1c7b7d5..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -hipBLASLt error: Heuristic Fetch Failed! -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1163.88 ± 56.10 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 25.78 ± 0.00 | - -build: 4807e8f9 (6609) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4__fa1.log index 705807c..2be1bd7 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1508.43 ± 11.78 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 27.36 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1449.04 ± 10.94 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 27.86 ± 0.00 | -build: 4807e8f9 (6609) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..90b82e9 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 619.30 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 25.38 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4__hblt0__fa1.log index 2885396..afe2728 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1492.62 ± 19.37 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 27.36 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1444.02 ± 15.40 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 28.40 ± 0.00 | -build: 4807e8f9 (6609) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..36b8745 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 612.85 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 25.39 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7.1-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm7.1-rocwmma__fa1.log new file mode 100644 index 0000000..149dafc --- /dev/null +++ b/benchmark/results/gpt-oss-20b-F32__rocm7.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1499.09 ± 21.84 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 28.40 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7.1-rocwmma__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-F32__rocm7.1-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..a7f08c1 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-F32__rocm7.1-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 350.48 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.91 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7.1-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm7.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..fb29283 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-F32__rocm7.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1448.72 ± 23.17 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 28.37 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-F32__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..95a1696 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-F32__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 352.46 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.92 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7.1__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm7.1__fa1.log new file mode 100644 index 0000000..061048d --- /dev/null +++ b/benchmark/results/gpt-oss-20b-F32__rocm7.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1437.87 ± 9.12 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 28.25 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7.1__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-F32__rocm7.1__fa1__longctx32768.log new file mode 100644 index 0000000..fb8cd0f --- /dev/null +++ b/benchmark/results/gpt-oss-20b-F32__rocm7.1__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 443.64 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 25.05 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7.1__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm7.1__hblt0__fa1.log new file mode 100644 index 0000000..df945d7 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-F32__rocm7.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1405.44 ± 19.51 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 28.27 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7.1__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-F32__rocm7.1__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..658f7c8 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-F32__rocm7.1__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 454.92 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 25.10 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log index 2965353..0af68bb 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1470.86 ± 14.39 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 27.29 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1486.29 ± 11.42 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 28.39 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1__longctx32768.log similarity index 56% rename from benchmark/results/gpt-oss-20b-F32__rocm7_rc.log rename to benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1__longctx32768.log index f3a8e8e..e91a628 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1228.62 ± 4.47 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.17 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 352.25 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.92 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log index 54fdc4a..3e93b8a 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1421.53 ± 7.06 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 27.32 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1426.84 ± 4.41 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 28.36 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log similarity index 56% rename from benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0.log rename to benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log index 72962bc..1a33f1d 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1263.37 ± 8.50 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.18 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 351.69 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.91 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc__fa1.log index 6bafac8..2500f7d 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1492.83 ± 17.46 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 27.29 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1424.62 ± 5.37 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 28.27 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc__fa1__longctx32768.log similarity index 56% rename from benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma.log rename to benchmark/results/gpt-oss-20b-F32__rocm7_rc__fa1__longctx32768.log index beec68f..5a63e90 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1220.88 ± 18.41 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.18 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 446.44 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 25.05 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log index f574c02..4fd877d 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1426.10 ± 25.91 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 27.35 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1380.59 ± 26.70 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 28.29 ± 0.01 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1__longctx32768.log similarity index 56% rename from benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0.log rename to benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1__longctx32768.log index aeb83b4..0fa0af0 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1252.31 ± 14.38 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.17 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 465.50 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 25.06 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log b/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log index f7a30a9..c7b493d 100644 --- a/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 609.37 ± 2.58 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 18.25 ± 0.01 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 574.29 ± 4.39 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 17.78 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk.log b/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk__fa1__longctx32768.log similarity index 60% rename from benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk.log rename to benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk__fa1__longctx32768.log index ec1d4a0..2a70ab4 100644 --- a/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk.log +++ b/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 0 | pp512 | 566.88 ± 3.31 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 0 | tg128 | 18.39 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 221.72 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 15.61 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-F32__vulkan_radv__fa1.log b/benchmark/results/gpt-oss-20b-F32__vulkan_radv__fa1.log index 60f2dbb..7c46f4c 100644 --- a/benchmark/results/gpt-oss-20b-F32__vulkan_radv__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 451.11 ± 2.96 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 16.83 ± 0.01 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 448.90 ± 3.43 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 16.15 ± 0.01 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-F32__vulkan_radv.log b/benchmark/results/gpt-oss-20b-F32__vulkan_radv__fa1__longctx32768.log similarity index 60% rename from benchmark/results/gpt-oss-20b-F32__vulkan_radv.log rename to benchmark/results/gpt-oss-20b-F32__vulkan_radv__fa1__longctx32768.log index 9f8373e..3d43f7d 100644 --- a/benchmark/results/gpt-oss-20b-F32__vulkan_radv.log +++ b/benchmark/results/gpt-oss-20b-F32__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 0 | pp512 | 423.31 ± 2.25 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 0 | tg128 | 16.82 ± 0.02 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 243.39 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 14.76 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__fa1.log similarity index 56% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0.log rename to benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__fa1.log index edbd049..7d852e7 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1375.72 ± 12.99 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 67.16 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1333.81 ± 9.84 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 73.64 ± 0.01 | -build: 11f0af55 (6736) +build: 31df4608 (7038) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log similarity index 56% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0.log rename to benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log index fa5f5cf..64d7e12 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1329.52 ± 7.97 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 67.20 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 537.64 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 51.44 ± 0.00 | -build: 4807e8f9 (6609) +build: 31df4608 (7038) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1.log new file mode 100644 index 0000000..1780e19 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1384.08 ± 13.61 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 73.16 ± 0.01 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..0b1381a --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 349.88 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 28.88 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__fa1.log new file mode 100644 index 0000000..a2bda58 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1326.80 ± 18.91 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 73.50 ± 0.01 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__fa1__longctx32768.log new file mode 100644 index 0000000..d8cbd0b --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 398.06 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 51.13 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma.log deleted file mode 100644 index 39c5bd8..0000000 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1247.40 ± 5.38 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 67.13 ± 0.02 | - -build: 11f0af55 (6736) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1.log index 0898740..69d55ba 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1529.26 ± 3.68 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 68.05 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1510.54 ± 2.64 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 72.95 ± 0.00 | -build: 11f0af55 (6736) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..d55b6c5 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 474.83 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 28.83 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1.log index 9628548..ebd62ee 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1585.34 ± 7.29 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 68.08 ± 0.02 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1598.15 ± 4.85 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 73.02 ± 0.03 | -build: 11f0af55 (6736) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..bbce369 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 477.40 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 28.86 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4.log deleted file mode 100644 index 5d3ee78..0000000 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1220.54 ± 7.38 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 67.22 ± 0.00 | - -build: 4807e8f9 (6609) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log index ff4cb9d..554365c 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1534.52 ± 6.57 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 68.16 ± 0.02 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1510.09 ± 10.37 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 73.09 ± 0.00 | -build: 4807e8f9 (6609) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..0245ac8 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 518.94 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 56.02 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log index fdd5113..43e084b 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1601.11 ± 22.80 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 68.21 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1572.54 ± 11.37 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 72.96 ± 0.02 | -build: 4807e8f9 (6609) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..730bd47 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 554.20 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 56.02 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__fa1.log new file mode 100644 index 0000000..454f08b --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1511.00 ± 19.49 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 73.04 ± 0.03 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..e68f1b7 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 345.98 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 28.89 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..c2b5d5e --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1559.27 ± 17.65 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 73.16 ± 0.03 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..1c3d423 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 349.08 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 28.83 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1__fa1.log new file mode 100644 index 0000000..c25742d --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1476.67 ± 4.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 72.53 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1__fa1__longctx32768.log new file mode 100644 index 0000000..2d5b65d --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 418.15 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 54.47 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1__hblt0__fa1.log new file mode 100644 index 0000000..5940589 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1526.60 ± 21.01 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 72.45 ± 0.02 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..7545939 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 431.87 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 54.67 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log index cdc2616..5841cfd 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1506.28 ± 15.62 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 67.98 ± 0.03 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1507.89 ± 10.23 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 73.07 ± 0.04 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1__longctx32768.log similarity index 56% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc.log rename to benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1__longctx32768.log index e7e4a3c..f94ac8b 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1239.41 ± 5.44 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 67.10 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 345.40 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 28.82 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log index bd12b56..3467d3e 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1563.47 ± 11.28 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 67.91 ± 0.02 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1558.14 ± 14.95 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 73.06 ± 0.02 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log similarity index 56% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0.log rename to benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log index 59c8dd1..c996efd 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1368.12 ± 12.11 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 67.09 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 349.21 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 28.87 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log index f709e2d..3e50229 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1508.59 ± 7.75 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 67.92 ± 0.02 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1463.05 ± 15.58 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 72.34 ± 0.03 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__fa1__longctx32768.log similarity index 56% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma.log rename to benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__fa1__longctx32768.log index d629bad..e36709a 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1239.97 ± 8.69 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 67.10 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 422.57 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 54.49 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log index 0cc8bcd..7033622 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1566.75 ± 13.55 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 67.99 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1513.62 ± 5.84 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 72.47 ± 0.02 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1__longctx32768.log similarity index 56% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0.log rename to benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1__longctx32768.log index 005bec1..ad72e1c 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1381.33 ± 11.13 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 67.13 ± 0.02 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 435.15 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 54.40 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log index ca541fd..a7d392a 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1908.57 ± 17.12 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 72.91 ± 0.04 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1681.86 ± 231.36 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 75.38 ± 0.03 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log similarity index 60% rename from benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log rename to benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log index 089ce35..c67d585 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 0 | pp512 | 1515.08 ± 10.36 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 0 | tg128 | 74.59 ± 0.07 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 300.31 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 46.98 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log index f3b5d7c..f6ceacb 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1340.77 ± 10.85 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 75.19 ± 0.11 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1354.58 ± 9.42 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 77.10 ± 0.22 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log similarity index 60% rename from benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv.log rename to benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log index ca19264..4a9b8da 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 0 | pp512 | 1097.23 ± 7.32 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 0 | tg128 | 74.95 ± 0.40 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 298.10 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 52.75 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__fa1.log similarity index 56% rename from benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma.log rename to benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__fa1.log index 5919eae..4ba377e 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 985.32 ± 1.37 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 50.21 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1091.87 ± 1.16 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 50.02 ± 0.00 | -build: 11f0af55 (6736) +build: 31df4608 (7038) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log similarity index 56% rename from benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0.log rename to benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log index 4258f59..6ae1d5c 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 910.75 ± 2.97 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 50.19 ± 0.02 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 54.85 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.58 ± 0.00 | -build: 11f0af55 (6736) +build: 31df4608 (7038) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1.log similarity index 56% rename from benchmark/results/llama-2-7b.Q4_0__rocm6_4_4.log rename to benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1.log index 7649f89..5dd9b43 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 980.87 ± 2.46 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.86 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1117.58 ± 1.52 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 50.47 ± 0.02 | -build: 4807e8f9 (6609) +build: 1c398dc9e (7034) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1__longctx32768.log similarity index 56% rename from benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0.log rename to benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1__longctx32768.log index e0c43a5..733f960 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 896.70 ± 0.33 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.87 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 47.68 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.58 ± 0.00 | -build: 4807e8f9 (6609) +build: 1c398dc9e (7034) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__fa1.log new file mode 100644 index 0000000..9f57dab --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1119.14 ± 0.89 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 50.51 ± 0.02 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__fa1__longctx32768.log new file mode 100644 index 0000000..809b014 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 167.07 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.58 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1.log index 6ca70dd..b702a5c 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1101.41 ± 1.79 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 49.92 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1113.73 ± 1.45 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.09 ± 0.01 | -build: 11f0af55 (6736) +build: 1c398dc9e (7034) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..9ad214d --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 49.04 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.92 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1.log index fcfcec4..db329af 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1009.78 ± 2.26 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 49.91 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1014.02 ± 2.02 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 50.96 ± 0.01 | -build: 11f0af55 (6736) +build: 1c398dc9e (7034) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..fc72518 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 49.34 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.92 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log index 1c195f9..6f9bc79 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1113.80 ± 2.42 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 49.40 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1116.12 ± 3.27 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.11 ± 0.02 | -build: 4807e8f9 (6609) +build: 1c398dc9e (7034) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..7ee8eea --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 186.52 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.91 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log index 4e33479..2279b99 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1017.14 ± 1.96 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 49.40 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1021.92 ± 1.12 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.07 ± 0.01 | -build: 4807e8f9 (6609) +build: 1c398dc9e (7034) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..45771d9 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 188.56 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.91 ± 0.00 | + +build: 1c398dc9e (7034) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7.1-rocwmma__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7.1-rocwmma__fa1.log new file mode 100644 index 0000000..afbf55d --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1100.61 ± 4.23 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.08 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7.1-rocwmma__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm7.1-rocwmma__fa1__longctx32768.log new file mode 100644 index 0000000..a11d43b --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7.1-rocwmma__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 70.66 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.91 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7.1-rocwmma__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..f5034a4 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1007.54 ± 4.33 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.04 ± 0.01 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..59dc74e --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 70.57 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.92 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7.1__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7.1__fa1.log new file mode 100644 index 0000000..5474b5f --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1107.98 ± 1.17 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.05 ± 0.02 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7.1__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm7.1__fa1__longctx32768.log new file mode 100644 index 0000000..b7f4180 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7.1__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 171.20 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.91 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7.1__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7.1__hblt0__fa1.log new file mode 100644 index 0000000..81a14b9 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1015.07 ± 2.17 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.00 ± 0.02 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7.1__hblt0__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm7.1__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..2d8527e --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7.1__hblt0__fa1__longctx32768.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 174.55 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.91 ± 0.00 | + +build: ee8dd5c65 (7035) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log index ddbc53b..23e50c3 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1092.96 ± 3.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 49.81 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1100.81 ± 1.25 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.00 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1__longctx32768.log similarity index 56% rename from benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma.log rename to benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1__longctx32768.log index cdea446..c05fd49 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 990.88 ± 3.03 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 50.20 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 69.72 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.91 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log index 49195bc..336c571 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 956.75 ± 3.28 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 49.98 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 963.09 ± 2.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.02 ± 0.01 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log similarity index 56% rename from benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0.log rename to benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log index 43f68cc..4eafa4f 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 875.30 ± 2.50 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 50.23 ± 0.02 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 71.68 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.91 ± 0.00 | -build: a3cb0474 (6735) +build: bca95ca51 (7036) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__fa1.log index 078fa87..6ab6bf8 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1107.83 ± 1.63 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 49.98 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1113.49 ± 3.85 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.02 ± 0.02 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__fa1__longctx32768.log similarity index 56% rename from benchmark/results/llama-2-7b.Q4_0__rocm7_rc.log rename to benchmark/results/llama-2-7b.Q4_0__rocm7_rc__fa1__longctx32768.log index 35c66af..d1c28ea 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 987.11 ± 2.95 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 50.23 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 177.96 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.92 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log index 1fda496..a879978 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 966.53 ± 1.48 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 49.81 ± 0.01 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 968.65 ± 2.86 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.01 ± 0.01 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1__longctx32768.log similarity index 56% rename from benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0.log rename to benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1__longctx32768.log index 44169fc..ebaa5b3 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1__longctx32768.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 873.58 ± 1.82 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 50.15 ± 0.02 | +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 173.34 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.91 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log index cf2840a..63adc8b 100644 --- a/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 1376.09 ± 0.77 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 53.23 ± 0.06 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 1378.42 ± 1.37 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 53.41 ± 0.06 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log similarity index 60% rename from benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk.log rename to benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log index 19d7053..b66aa79 100644 --- a/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk.log +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 0 | pp512 | 1325.29 ± 2.18 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 0 | tg128 | 53.69 ± 0.09 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 101.79 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 9.25 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1.log index 6572ff7..cd66204 100644 --- a/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 1096.08 ± 2.80 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 53.86 ± 0.02 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 1094.33 ± 3.06 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 53.65 ± 0.01 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_radv.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log similarity index 60% rename from benchmark/results/llama-2-7b.Q4_0__vulkan_radv.log rename to benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log index 9031f93..4578395 100644 --- a/benchmark/results/llama-2-7b.Q4_0__vulkan_radv.log +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 0 | pp512 | 1005.90 ± 1.71 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 0 | tg128 | 54.61 ± 0.02 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 174.60 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 8.70 ± 0.00 | -build: a3cb0474 (6735) +build: 1c398dc9e (7034) diff --git a/docs/assets/index2.css b/docs/assets/index2.css new file mode 100644 index 0000000..b483a53 --- /dev/null +++ b/docs/assets/index2.css @@ -0,0 +1,503 @@ +:root { + --bg: #f5f6fa; + --ink: #101828; + --muted: #6b7080; + --accent: #155eef; + --border: #d8dce6; + --card: #ffffff; + --chip-bg: #e6ecff; + --chip-active-bg: #155eef; + --chip-active-ink: #fff; + --winner-bg: #d7f5e3; + --winner-ink: #025333; + --warn: #c2410c; + --model-col: 180px; + --winner-col: 120px; +} + +* { + box-sizing: border-box; +} + +body { + margin: 0; + font: 13px/1.35 "Inter", "Segoe UI", system-ui, -apple-system, sans-serif; + background: var(--bg); + color: var(--ink); +} + +header { + padding: 14px 20px 4px; + background: var(--card); + border-bottom: 1px solid var(--border); +} + +header h1 { + margin: 0 0 4px; + font-size: 20px; + font-weight: 600; +} + +header p { + margin: 2px 0; + font-size: 12px; + color: var(--muted); +} + +.controls, +.panel { + background: var(--card); + border-bottom: 1px solid var(--border); + padding: 10px 20px; +} + +.controls { + display: flex; + gap: 12px; + flex-wrap: wrap; + align-items: flex-start; +} + +.control { + min-width: 200px; +} + +.control.grow { + flex: 1 1 320px; +} + +.slider-block { + min-width: 260px; +} + +label { + display: block; + font-size: 10px; + text-transform: uppercase; + letter-spacing: 0.08em; + color: var(--muted); + margin-bottom: 3px; +} + +input[type="text"], +select { + width: 100%; + padding: 6px 9px; + border-radius: 6px; + border: 1px solid var(--border); + font-size: 13px; + background: #fff; +} + +.chip-row { + display: flex; + flex-wrap: wrap; + gap: 4px; +} + +.chip { + border: none; + border-radius: 999px; + padding: 3px 10px; + font-size: 12px; + cursor: pointer; + background: var(--chip-bg); + color: var(--ink); +} + +.chip.active { + background: var(--chip-active-bg); + color: var(--chip-active-ink); +} + +.chip.small { + font-size: 11px; + padding: 3px 8px; +} + +.panel.compact { + padding: 8px 20px; +} + +.panel-split { + display: flex; + gap: 16px; + flex-wrap: wrap; + align-items: center; +} + +.backend-list { + display: flex; + flex-wrap: wrap; + gap: 6px 14px; +} + +.backend-label { + display: flex; + align-items: center; + gap: 8px; +} + +.backend-actions { + display: flex; + gap: 6px; +} + +.backend-item { + display: inline-flex; + align-items: center; + gap: 6px; + font-size: 12px; + color: var(--ink); +} + +.backend-item input { + transform: translateY(1px); +} + +.backend-item .tag { + font-size: 10px; + padding: 0 6px; + border-radius: 999px; + background: #eef2ff; + color: #1d3ea5; + text-transform: uppercase; + transform: translateY(-2px); +} + +.stats-box { + margin-left: auto; + display: flex; + gap: 10px; + align-items: center; + font-size: 12px; + color: var(--muted); +} + +#tables { + display: grid; + gap: 14px; +} + +.test-block h2 { + margin: 0 0 4px; + font-size: 12px; + text-transform: uppercase; + letter-spacing: 0.06em; + color: var(--muted); +} + + +.table-wrap { + border-radius: 8px; + border: 1px solid var(--border); + background: var(--card); + position: relative; + width: 100%; + max-width: 100%; + overflow: hidden; +} + +.table-scroll { + overflow-x: auto; + overflow-y: hidden; + width: 100%; + position: relative; + scrollbar-gutter: stable both-edges; + display: block; +} + +.table-scroll table { + min-width: 100%; +} + +table { + border-collapse: collapse; + font-size: 11.5px; + width: max-content; + min-width: 100%; + table-layout: fixed; +} + +thead { + background: #f4f6fb; +} + +th, +td { + padding: 4px 6px; + border-bottom: 1px solid var(--border); + white-space: normal; + border-right: 1px solid var(--border); + overflow-wrap: anywhere; +} + +th { + position: relative; + font-weight: 600; +} + +th.sticky, +td.sticky { + position: sticky; + left: 0; + background: inherit; + z-index: 3; + box-shadow: 1px 0 0 var(--border); +} + +th.model, +td.model { + width: var(--model-col); + position: sticky; + left: 0; + z-index: 3; + background: #f8f9ff; +} + +th.winner, +td.winner { + width: var(--winner-col); + position: sticky; + left: var(--model-col); + z-index: 3; + background: #f1f5ff; +} + +td.model { + min-width: 170px; + font-weight: 500; +} + +.data-cell { + white-space: normal; + position: relative; +} + +.data-cell[data-env]:hover::after { + content: attr(data-env); + position: absolute; + top: 50%; + transform: translateY(-50%); + left: 50%; + transform: translate(-50%, -120%); + background: rgba(16, 24, 40, 0.92); + color: #fff; + padding: 4px 8px; + border-radius: 6px; + font-size: 11px; + white-space: nowrap; + pointer-events: none; + z-index: 5; +} + +.data-cell[data-env]:hover::before { + content: ""; + position: absolute; + top: 50%; + left: 50%; + transform: translate(-50%, -30%); + border: 6px solid transparent; + border-top-color: rgba(16, 24, 40, 0.92); + pointer-events: none; + z-index: 5; +} + +.data-cell .measure, +.data-cell .std { + white-space: nowrap; +} + +.row-actions { + display: flex; + gap: 6px; + margin-top: 4px; + flex-wrap: wrap; +} + +.row-action-btn { + border: none; + background: transparent; + color: var(--accent); + font-size: 11px; + padding: 0; + cursor: pointer; + text-decoration: underline; + text-underline-offset: 2px; +} + +.row-action-btn:hover { + color: #0d3fb8; +} + +td.model .meta { + font-size: 10px; + color: var(--muted); +} + +tbody tr:nth-child(even) td { + background: #fafbff; +} + +.measure { + font-feature-settings: "tnum"; + font-size: 12px; + font-weight: 600; +} + +.std { + color: var(--muted); + font-size: 10px; +} + +.winner-list { + display: flex; + flex-wrap: wrap; + gap: 2px; +} + +.winner-pill { + display: inline-flex; + align-items: center; + padding: 2px 6px; + border-radius: 999px; + font-size: 10px; + background: #dbeafe; + color: #1e3a8a; + margin: 1px; + white-space: nowrap; +} + +.cell-error { + color: var(--warn); +} + +.cell-empty { + color: #c3c7d1; +} + +.best { + background: var(--winner-bg) !important; + color: var(--winner-ink); +} + +td.best .measure, +td.best .std { + color: var(--winner-ink); +} + +.resize-handle { + position: absolute; + top: 0; + right: 0; + width: 6px; + height: 100%; + cursor: col-resize; +} + +.resize-handle::after { + content: ""; + position: absolute; + inset: 0; + background: transparent; +} + +th.backend-header { + cursor: grab; + white-space: nowrap; +} + +th.backend-header.dragging { + opacity: 0.5; +} + +th.backend-header.drop-target { + outline: 2px dashed var(--accent); +} + +.resize-line { + width: 2px; + background: var(--accent); + pointer-events: none; +} + +.resize-overlay { + position: absolute; + top: 0; + bottom: 0; + left: 0; + right: 0; + pointer-events: none; +} + +.resize-bar { + position: absolute; + top: 0; + bottom: 0; + width: 6px; + cursor: col-resize; + pointer-events: auto; + background: transparent; +} + +.tag { + display: inline-flex; + align-items: center; + padding: 0 6px; + border-radius: 999px; + background: #f1f5ff; + color: #1d4ed8; + font-size: 11px; +} +.range-wrap { + position: relative; + height: 32px; +} + +.range-wrap input[type="range"] { + position: absolute; + inset: 0; + width: 100%; + background: transparent; + -webkit-appearance: none; + appearance: none; + pointer-events: none; +} + +.range-wrap input[type="range"]::-webkit-slider-thumb { + pointer-events: auto; + -webkit-appearance: none; + width: 18px; + height: 18px; + border-radius: 50%; + background: var(--accent); + border: 2px solid #fff; + box-shadow: 0 0 3px rgba(0, 0, 0, 0.3); +} + +.range-wrap input[type="range"]::-moz-range-thumb { + pointer-events: auto; + width: 18px; + height: 18px; + border-radius: 50%; + background: var(--accent); + border: 2px solid #fff; +} + +.range-track { + position: absolute; + top: 50%; + left: 0; + right: 0; + height: 6px; + border-radius: 999px; + background: #e3e7f1; + transform: translateY(-50%); + pointer-events: none; +} + +.range-values { + font-size: 11px; + color: var(--muted); + margin-top: 4px; +} diff --git a/docs/assets/index2.js b/docs/assets/index2.js new file mode 100644 index 0000000..3837fb3 --- /dev/null +++ b/docs/assets/index2.js @@ -0,0 +1,687 @@ +const DEFAULT_CTX = "default"; +const K_SIGMA = 1.0; +const MIN_TOL = 0.25; +const MODEL_COL_WIDTH = 180; +const WINNER_COL_WIDTH = 120; + +const state = { + contexts: [], + contextMap: new Map(), + envs: [], + backendOrder: [], + columnWidths: {}, + filters: { + search: "", + quant: "", + context: DEFAULT_CTX, + backends: new Set(), + sizeLo: null, + sizeHi: null, + }, + ui: {}, + sizeStats: { min: Infinity, max: -Infinity }, + draggingEnv: null, +}; + +document.addEventListener("DOMContentLoaded", async () => { + cacheUI(); + try { + const res = await fetch("results.json"); + const data = await res.json(); + prepareData(data?.runs || []); + initializeControls(); + renderTables(); + } catch (err) { + console.error("Failed to load results.json", err); + state.ui.stats.textContent = "Failed to load results.json"; + } +}); + +function cacheUI() { + state.ui = { + search: document.getElementById("filter-search"), + quant: document.getElementById("filter-quant"), + contextChips: document.getElementById("context-chips"), + backendList: document.getElementById("backend-list"), + backendAll: document.getElementById("backend-all"), + backendNone: document.getElementById("backend-none"), + sizeLo: document.getElementById("sizeLo"), + sizeHi: document.getElementById("sizeHi"), + sizeTrack: document.getElementById("sizeTrack"), + sizeLoVal: document.getElementById("sizeLoVal"), + sizeHiVal: document.getElementById("sizeHiVal"), + stats: document.getElementById("stats-line"), + resetBtn: document.getElementById("reset-layout"), + tables: document.getElementById("tables"), + }; +} + +function prepareData(runs) { + const contextMap = new Map(); + const envSet = new Set(); + const quantSet = new Set(); + + for (const run of runs) { + const test = normalizeTest(run.test); + if (!test || !run.env) continue; + const contextKey = run.context || DEFAULT_CTX; + const env = run.env; + envSet.add(env); + if (run.quant) quantSet.add(run.quant.toUpperCase()); + + const ctx = ensureContext(contextMap, contextKey, run.context_tokens); + const testEntry = ensureTest(ctx, test.original); + + const modelName = run.model_clean || run.model; + const row = ensureModel(testEntry, modelName, run); + row.backends[env] = { + mean: typeof run.tps_mean === "number" ? run.tps_mean : null, + std: typeof run.tps_std === "number" ? run.tps_std : null, + error: Boolean(run.error), + error_type: run.error_type || null, + }; + } + + state.contextMap = contextMap; + state.contexts = [...contextMap.values()].sort((a, b) => { + if (a.key === DEFAULT_CTX) return -1; + if (b.key === DEFAULT_CTX) return 1; + if (a.tokens && b.tokens) return a.tokens - b.tokens; + if (a.tokens) return -1; + if (b.tokens) return 1; + return a.key.localeCompare(b.key); + }); + state.envs = [...envSet].sort(); + state.backendOrder = [...state.envs]; + state.columnWidths = Object.fromEntries(state.envs.map((env) => [env, 120])); + state.quantOptions = [...quantSet].sort(); + state.filters.context = state.contexts[0]?.key || DEFAULT_CTX; + state.filters.backends = new Set(state.envs); +} + +function ensureContext(map, key, tokens) { + if (!map.has(key)) { + map.set(key, { + key, + label: formatContextLabel(key, tokens), + tokens: tokens ?? null, + tests: new Map(), + }); + } else if (tokens && !map.get(key).tokens) { + const ctx = map.get(key); + ctx.tokens = tokens; + ctx.label = formatContextLabel(key, tokens); + } + return map.get(key); +} + +function ensureTest(ctx, testName) { + if (!ctx.tests.has(testName)) { + ctx.tests.set(testName, { + name: testName, + models: new Map(), + }); + } + return ctx.tests.get(testName); +} + +function ensureModel(testEntry, modelName, run) { + if (!testEntry.models.has(modelName)) { + testEntry.models.set(modelName, { + model: modelName, + quant: (run.quant || "Unknown").toUpperCase(), + sizeB: run.name_params_b ?? run.params_b ?? null, + backends: {}, + search_blob: [modelName, run.quant, run.env, run.test] + .filter(Boolean) + .map((s) => s.toString().toLowerCase()) + .join(" "), + }); + } + const row = testEntry.models.get(modelName); + const sizeCandidate = run.name_params_b ?? run.params_b; + if (row.sizeB == null && typeof sizeCandidate === "number") { + row.sizeB = sizeCandidate; + } + if (typeof row.sizeB === "number") { + state.sizeStats.min = Math.min(state.sizeStats.min, row.sizeB); + state.sizeStats.max = Math.max(state.sizeStats.max, row.sizeB); + } + return row; +} + +function initializeControls() { + const { quant, contextChips, backendList, search, resetBtn, sizeLo, sizeHi } = state.ui; + + quant.innerHTML = ""; + const anyOpt = document.createElement("option"); + anyOpt.value = ""; + anyOpt.textContent = "Any"; + quant.appendChild(anyOpt); + state.quantOptions.forEach((q) => { + const opt = document.createElement("option"); + opt.value = q; + opt.textContent = q; + quant.appendChild(opt); + }); + + contextChips.innerHTML = ""; + state.contexts.forEach((ctx) => { + const btn = document.createElement("button"); + btn.type = "button"; + btn.className = "chip" + (ctx.key === state.filters.context ? " active" : ""); + btn.dataset.context = ctx.key; + btn.textContent = ctx.label; + contextChips.appendChild(btn); + }); + + renderBackendList(); + setupSizeSlider(); + + search.addEventListener("input", (e) => { + state.filters.search = (e.target.value || "").trim().toLowerCase(); + renderTables(); + }); + + quant.addEventListener("change", (e) => { + state.filters.quant = e.target.value; + renderTables(); + }); + + contextChips.addEventListener("click", (e) => { + const btn = e.target.closest("button[data-context]"); + if (!btn) return; + state.filters.context = btn.dataset.context; + [...contextChips.querySelectorAll("button")].forEach((b) => b.classList.toggle("active", b === btn)); + renderTables(); + }); + + backendList.addEventListener("change", (e) => { + const checkbox = e.target.closest("input[data-env]"); + if (!checkbox) return; + const env = checkbox.dataset.env; + if (checkbox.checked) { + state.filters.backends.add(env); + } else { + state.filters.backends.delete(env); + } + renderTables(); + }); + + state.ui.backendAll.addEventListener("click", () => { + state.filters.backends = new Set(state.envs); + renderBackendList(); + renderTables(); + }); + + state.ui.backendNone.addEventListener("click", () => { + state.filters.backends = new Set(); + renderBackendList(); + renderTables(); + }); + + sizeLo.addEventListener("input", () => updateSizeUI(true)); + sizeHi.addEventListener("input", () => updateSizeUI(true)); + + resetBtn.addEventListener("click", () => { + state.filters.search = ""; + state.filters.quant = ""; + state.filters.context = state.contexts[0]?.key || DEFAULT_CTX; + state.filters.backends = new Set(state.envs); + search.value = ""; + quant.value = ""; + [...contextChips.querySelectorAll("button")].forEach((btn) => + btn.classList.toggle("active", btn.dataset.context === state.filters.context) + ); + renderBackendList(); + setupSizeSlider(); + renderTables(); + }); +} + +function renderBackendList() { + const container = state.ui.backendList; + container.innerHTML = ""; + state.backendOrder.forEach((env) => { + const label = document.createElement("label"); + label.className = "backend-item"; + const checkbox = document.createElement("input"); + checkbox.type = "checkbox"; + checkbox.dataset.env = env; + checkbox.checked = state.filters.backends.has(env); + label.appendChild(checkbox); + + const baseSpan = document.createElement("span"); + const { base, tags } = splitEnvName(env); + baseSpan.textContent = base; + label.appendChild(baseSpan); + tags.forEach((tag) => { + const pill = document.createElement("span"); + pill.className = "tag"; + pill.textContent = tag; + label.appendChild(pill); + }); + + container.appendChild(label); + }); +} + +function setupSizeSlider() { + const { sizeLo, sizeHi } = state.ui; + const minRaw = state.sizeStats.min === Infinity ? 0 : Math.floor(state.sizeStats.min || 0); + const maxRaw = state.sizeStats.max === -Infinity ? 0 : Math.ceil(state.sizeStats.max || 0); + const minB = Math.max(0, minRaw); + const maxB = Math.max(minB, maxRaw); + + [sizeLo, sizeHi].forEach((inp) => { + inp.min = minB; + inp.max = maxB; + inp.step = 1; + }); + + sizeLo.value = minB; + sizeHi.value = maxB; + sizeLo.style.zIndex = 2; + sizeHi.style.zIndex = 1; + updateSizeUI(false); +} + +function updateSizeUI(triggerRender) { + const { sizeLo, sizeHi, sizeLoVal, sizeHiVal, sizeTrack } = state.ui; + if (+sizeLo.value > +sizeHi.value) { + if (document.activeElement === sizeLo) { + sizeHi.value = sizeLo.value; + } else { + sizeLo.value = sizeHi.value; + } + } + sizeLo.style.zIndex = +sizeLo.value >= +sizeHi.max - 1 ? 4 : 2; + sizeHi.style.zIndex = +sizeHi.value <= +sizeLo.min + 1 ? 3 : 1; + state.filters.sizeLo = +sizeLo.value; + state.filters.sizeHi = +sizeHi.value; + sizeLoVal.textContent = formatSizeLabel(state.filters.sizeLo); + sizeHiVal.textContent = formatSizeLabel(state.filters.sizeHi); + const range = (sizeHi.max - sizeLo.min) || 1; + const minB = +sizeLo.min; + const start = ((state.filters.sizeLo - minB) / range) * 100; + const end = ((state.filters.sizeHi - minB) / range) * 100; + sizeTrack.style.background = `linear-gradient(to right, #e3e7f1 ${start}%, var(--accent) ${start}%, var(--accent) ${end}%, #e3e7f1 ${end}%)`; + if (triggerRender) renderTables(); +} + +function renderTables() { + const ctx = state.contextMap.get(state.filters.context); + if (!ctx) { + state.ui.tables.innerHTML = "

No data for this context.

"; + state.ui.stats.textContent = "0 rows"; + return; + } + + const backendList = state.backendOrder.filter((env) => state.filters.backends.has(env)); + const tests = [...ctx.tests.values()].sort((a, b) => a.name.localeCompare(b.name)); + const frag = document.createDocumentFragment(); + let totalRows = 0; + + for (const test of tests) { + const models = filterModels(test.models); + if (!models.length) continue; + totalRows += models.length; + const block = document.createElement("div"); + block.className = "test-block"; + const heading = document.createElement("h2"); + heading.textContent = `${test.name.toUpperCase()} — tokens/second`; + block.appendChild(heading); + + const tableWrap = document.createElement("div"); + tableWrap.className = "table-wrap"; + const scroller = document.createElement("div"); + scroller.className = "table-scroll"; + + const modelsWithWinners = models.map((model) => { + const winners = computeWinners(model, backendList); + return { ...model, _cachedWinners: winners }; + }); + + const table = buildSingleTable(modelsWithWinners, backendList); + scroller.appendChild(table); + tableWrap.appendChild(scroller); + block.appendChild(tableWrap); + setupResizeOverlay(scroller, backendList, table); + frag.appendChild(block); + } + + state.ui.tables.innerHTML = ""; + if (frag.childNodes.length) { + state.ui.tables.appendChild(frag); + } else { + state.ui.tables.innerHTML = "

No models match the current filters.

"; + } + state.ui.stats.textContent = `Showing ${totalRows.toLocaleString()} model rows across ${backendList.length} backends`; +} + +function buildSingleTable(models, backendList) { + const table = document.createElement("table"); + const colgroup = document.createElement("colgroup"); + const colModel = document.createElement("col"); + colModel.style.width = `${MODEL_COL_WIDTH}px`; + colgroup.appendChild(colModel); + const colWinner = document.createElement("col"); + colWinner.style.width = `${WINNER_COL_WIDTH}px`; + colgroup.appendChild(colWinner); + backendList.forEach((env) => { + const col = document.createElement("col"); + col.style.width = `${state.columnWidths[env] || 120}px`; + col.dataset.env = env; + colgroup.appendChild(col); + }); + table.appendChild(colgroup); + + const thead = document.createElement("thead"); + const headRow = document.createElement("tr"); + headRow.appendChild(makeHeaderCell("Model", "model")); + headRow.appendChild(makeHeaderCell("Winner", "winner")); + backendList.forEach((env) => { + const th = makeHeaderCell(env, "backend-header"); + attachHeaderInteractions(th, env); + headRow.appendChild(th); + }); + thead.appendChild(headRow); + table.appendChild(thead); + + const tbody = document.createElement("tbody"); + models.forEach((model) => { + const tr = document.createElement("tr"); + const tdModel = document.createElement("td"); + tdModel.className = "model"; + tdModel.innerHTML = `
${model.model}
${model.quant} · ${formatSize(model.sizeB)}
`; + + const actionWrap = document.createElement("div"); + actionWrap.className = "row-actions"; + const btnDesc = document.createElement("button"); + btnDesc.type = "button"; + btnDesc.className = "row-action-btn"; + btnDesc.textContent = "Sort ↓"; + btnDesc.addEventListener("click", (e) => { + e.preventDefault(); + sortBackendsByModel(model, "desc"); + }); + const btnAsc = document.createElement("button"); + btnAsc.type = "button"; + btnAsc.className = "row-action-btn"; + btnAsc.textContent = "Sort ↑"; + btnAsc.addEventListener("click", (e) => { + e.preventDefault(); + sortBackendsByModel(model, "asc"); + }); + actionWrap.appendChild(btnDesc); + actionWrap.appendChild(btnAsc); + tdModel.appendChild(actionWrap); + tr.appendChild(tdModel); + + const tdWinner = document.createElement("td"); + tdWinner.className = "winner"; + if (model._cachedWinners.length) { + const wrap = document.createElement("div"); + wrap.className = "winner-list"; + wrap.innerHTML = model._cachedWinners.map((w) => `${w}`).join(""); + tdWinner.appendChild(wrap); + } else { + tdWinner.innerHTML = ``; + } + + tr.appendChild(tdWinner); + + backendList.forEach((env) => { + const td = document.createElement("td"); + td.className = "data-cell"; + td.dataset.env = env; + const cell = model.backends[env]; + if (!cell) { + td.innerHTML = ``; + } else if (cell.error || cell.mean == null) { + td.innerHTML = `⚠ ${cell.error_type || "error"}`; + } else { + const isBest = model._cachedWinners.includes(env); + if (isBest) td.classList.add("best"); + td.innerHTML = `
${cell.mean.toFixed(2)}
± ${cell.std?.toFixed(2) ?? "—"}
`; + } + tr.appendChild(td); + }); + tbody.appendChild(tr); + }); + table.appendChild(tbody); + return table; +} + +function makeHeaderCell(label, extra = "") { + const th = document.createElement("th"); + th.textContent = label; + if (extra) th.className = extra; + return th; +} + +function attachHeaderInteractions(th, env) { + const width = state.columnWidths[env] || 120; + th.style.width = `${width}px`; + th.style.minWidth = `${width}px`; + th.draggable = true; + th.addEventListener("dragstart", (e) => { + state.draggingEnv = env; + th.classList.add("dragging"); + e.dataTransfer.effectAllowed = "move"; + }); + th.addEventListener("dragend", () => { + state.draggingEnv = null; + th.classList.remove("dragging"); + document.querySelectorAll("th.backend-header.drop-target").forEach((el) => el.classList.remove("drop-target")); + }); + th.addEventListener("dragover", (e) => { + if (!state.draggingEnv || state.draggingEnv === env) return; + e.preventDefault(); + th.classList.add("drop-target"); + }); + th.addEventListener("dragleave", () => th.classList.remove("drop-target")); + th.addEventListener("drop", (e) => { + if (!state.draggingEnv || state.draggingEnv === env) return; + e.preventDefault(); + moveBackend(state.draggingEnv, env); + th.classList.remove("drop-target"); + }); + + const handle = document.createElement("span"); + handle.className = "resize-handle"; + handle.addEventListener("mousedown", (e) => startResize(e, env)); + th.appendChild(handle); +} + +function moveBackend(from, to) { + const order = state.backendOrder; + const fromIdx = order.indexOf(from); + const toIdx = order.indexOf(to); + if (fromIdx === -1 || toIdx === -1) return; + const [col] = order.splice(fromIdx, 1); + order.splice(toIdx, 0, col); + renderBackendList(); + renderTables(); +} + +function filterModels(modelsMap) { + const models = []; + for (const model of modelsMap.values()) { + if (state.filters.search && !model.search_blob.includes(state.filters.search)) continue; + if (state.filters.quant && model.quant !== state.filters.quant) continue; + if (model.sizeB != null) { + if (state.filters.sizeLo != null && model.sizeB < state.filters.sizeLo - 1e-6) continue; + if (state.filters.sizeHi != null && model.sizeB > state.filters.sizeHi + 1e-6) continue; + } + models.push(model); + } + models.sort((a, b) => a.model.localeCompare(b.model)); + return models; +} + +function computeWinners(model, backends) { + const values = []; + backends.forEach((env) => { + const entry = model.backends[env]; + if (entry && !entry.error && typeof entry.mean === "number") { + values.push({ + env, + mean: entry.mean, + std: typeof entry.std === "number" ? entry.std : 0, + }); + } + }); + if (!values.length) return []; + let best = values[0]; + for (const v of values) if (v.mean > best.mean) best = v; + const winners = []; + for (const v of values) { + const pooled = Math.sqrt((best.std || 0) ** 2 + (v.std || 0) ** 2); + const tol = Math.max(MIN_TOL, K_SIGMA * pooled); + if ((best.mean - v.mean) <= tol) winners.push(v.env); + } + return winners; +} + +function normalizeTest(name) { + if (!name) return null; + return { key: name.toLowerCase(), original: name }; +} + +function formatContextLabel(key, tokens) { + if (key === DEFAULT_CTX) return "Default window"; + if (tokens) return `ctx ${tokens.toLocaleString()}`; + return key; +} + +function formatSize(size) { + if (size == null) return "—"; + return `${Number(size).toFixed(1)}B`; +} + +function formatSizeLabel(size) { + if (size >= 1000) return `${(size / 1000).toFixed(1)}kB`; + return `${Math.round(size)}B`; +} + +function sortBackendsByModel(model, direction) { + const dir = direction === "asc" ? 1 : -1; + const order = [...state.backendOrder].sort((a, b) => { + const va = backendValue(model.backends[a], direction); + const vb = backendValue(model.backends[b], direction); + if (va === vb) return a.localeCompare(b); + return (va - vb) * dir; + }); + state.backendOrder = order; + renderBackendList(); + renderTables(); +} + +function backendValue(entry, direction) { + if (!entry || entry.error || typeof entry.mean !== "number") { + return direction === "asc" ? Number.POSITIVE_INFINITY : Number.NEGATIVE_INFINITY; + } + return entry.mean; +} + +function splitEnvName(env) { + const parts = env.split(/-(?=rocwmma|improved|hblt0)/g); + if (parts.length === 1) return { base: env, tags: [] }; + const base = parts[0]; + const tags = env + .slice(base.length) + .split("-") + .filter(Boolean) + .map((t) => t.toUpperCase()); + return { base, tags }; +} + +function startResize(event, env) { + event.preventDefault(); + event.stopPropagation(); + const column = state.columnWidths[env] || 120; + const startX = event.clientX; + const shellRect = state.ui.tables.getBoundingClientRect(); + const guide = document.createElement("div"); + guide.className = "resize-line"; + guide.style.position = "fixed"; + guide.style.top = `${shellRect.top}px`; + guide.style.bottom = `${window.innerHeight - shellRect.bottom}px`; + guide.style.left = `${startX}px`; + guide.style.width = "2px"; + guide.style.background = "var(--accent)"; + guide.style.zIndex = "10"; + document.body.appendChild(guide); + let nextWidth = column; + + const onMove = (e) => { + const delta = e.clientX - startX; + nextWidth = Math.max(80, column + delta); + guide.style.left = `${e.clientX}px`; + }; + + const onUp = () => { + document.removeEventListener("mousemove", onMove); + document.removeEventListener("mouseup", onUp); + guide.remove(); + state.columnWidths[env] = nextWidth; + renderTables(); + }; + + document.addEventListener("mousemove", onMove); + document.addEventListener("mouseup", onUp); +} + +function setupResizeOverlay(tableWrap, backendList, table) { + let overlay = tableWrap.querySelector(".resize-overlay"); + if (!overlay) { + overlay = document.createElement("div"); + overlay.className = "resize-overlay"; + tableWrap.appendChild(overlay); + } else { + overlay.innerHTML = ""; + } + + overlay.style.width = `${tableWrap.clientWidth}px`; + overlay.style.height = `${table.offsetHeight}px`; + + const bars = []; + let offset = MODEL_COL_WIDTH + WINNER_COL_WIDTH; + backendList.forEach((env) => { + const width = state.columnWidths[env] || 120; + const bar = document.createElement("div"); + bar.className = "resize-bar"; + bar.dataset.env = env; + bar.addEventListener("mousedown", (e) => startResize(e, env)); + overlay.appendChild(bar); + bars.push({ bar, offset, width, env }); + offset += width; + }); + + const positionBars = () => { + bars.forEach(({ bar, offset, width }) => { + const left = offset + width - 3 - tableWrap.scrollLeft; + bar.style.left = `${left}px`; + }); + }; + positionBars(); + + if (tableWrap._overlayScroll) { + tableWrap.removeEventListener("scroll", tableWrap._overlayScroll); + } + const onScroll = () => positionBars(); + tableWrap.addEventListener("scroll", onScroll); + tableWrap._overlayScroll = onScroll; + + if (tableWrap._overlayResize) { + tableWrap._overlayResize.disconnect(); + } + const resizeObserver = new ResizeObserver(() => { + overlay.style.width = `${tableWrap.clientWidth}px`; + overlay.style.height = `${table.offsetHeight}px`; + positionBars(); + }); + resizeObserver.observe(tableWrap); + tableWrap._overlayResize = resizeObserver; +} diff --git a/docs/index.html b/docs/index.html index 010ca08..a022c27 100644 --- a/docs/index.html +++ b/docs/index.html @@ -2,780 +2,73 @@ - - - AMD Ryzen AI MAX+ 395 "Strix Halo" — Llama.cpp Backend Performance Comparison - - + + + AMD Strix Halo — Backend Benchmarks (Grid View) +
-

AMD Ryzen AI MAX+ 395 "Strix Halo" — Llama.cpp Backend Performance Comparison

-

- Compare model throughput across backends (pp512 & tg128). - Repo: kyuz0/amd-strix-halo-toolboxes -

-

Platform: Framework Desktop, 128GB Unified RAM (accelerator-performance tuned profile)

-

Loading meta…

+

AMD Ryzen AI MAX+ 395 “Strix Halo” — Benchmark Grid

+

Framework Desktop · AMD Ryzen AI MAX 395+ · 128GB unified RAM

+

Fedora 42 · Linux 6.18.0-0.rc5.243.vanilla.fc42.x86_64 · llama.cpp build 1c398dc9e (7034)

+

Benchmarks captured 14 Nov 2025 · Repo: kyuz0/amd-strix-halo-toolboxes

- -
-
- - +
+
+ +
-
- -
-
- - -
+
+ +
+
+
+ +
- -
- 4B50B96B143B189B235B -
-
- 4B235B + 0B0B
- -
- - -
-
- -
-
Winner = every selected backend within the best’s uncertainty range, combining ± errors from both - results.
- -
-

Prompt Processing (pp512) — tokens/second

-
-
-
-
-
-
- - - - - -
-
-
-
-

Text Generation (tg128) — tokens/second

-
-
-
-
+
+
+
+
+ +
+ + +
+
+
+
+
+
Loading…
+ +
-
- - - - - -
-
-
- +
+
+
+ - \ No newline at end of file + diff --git a/docs/results.json b/docs/results.json index 9c024a8..392bd1a 100644 --- a/docs/results.json +++ b/docs/results.json @@ -1,26 +1,37 @@ { "meta": { - "generated_at": "2025-10-20T19:05:18Z", + "generated_at": "2025-11-15T08:24:40Z", "os_kernel": "Fedora 42 \u2014 Linux 6.15.9-201.fc42.x86_64 (Sat Aug 2 11:37:34 UTC 2025)", "llamacpp_builds": [ { - "hash": "11f0af55", - "number": "6736" + "hash": "1c398dc9e", + "number": "7034" }, { - "hash": "4807e8f9", - "number": "6609" + "hash": "31df4608", + "number": "7038" }, { - "hash": "a3cb0474", - "number": "6735" + "hash": "bca95ca51", + "number": "7036" + }, + { + "hash": "ee8dd5c65", + "number": "7035" } ], "environments": [ + "rocm-7alpha", + "rocm-7alpha-rocwmma", + "rocm-7alpha-rocwmma-improved", "rocm6_4_4", "rocm6_4_4-hblt0", "rocm6_4_4-rocwmma", "rocm6_4_4-rocwmma-hblt0", + "rocm7.1", + "rocm7.1-hblt0", + "rocm7.1-rocwmma", + "rocm7.1-rocwmma-hblt0", "rocm7_rc", "rocm7_rc-hblt0", "rocm7_rc-rocwmma", @@ -34,13 +45,15 @@ { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": false, + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 121.16, - "tps_std": 0.25, + "tps_mean": 103.27, + "tps_std": 0.47, "error": false, "error_type": null, "backend": "ROCm", @@ -50,21 +63,23 @@ "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "31df4608", + "number": "7038" } }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": false, + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 20.46, + "tps_mean": 22.61, "tps_std": 0.0, "error": false, "error_type": null, @@ -75,22 +90,204 @@ "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "31df4608", + "number": "7038" } }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 19.09, + "tps_std": 0.0, + "error": true, + "error_type": "hang", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "build": null + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 134.2, - "tps_std": 0.28, + "tps_mean": 87.62, + "tps_std": 0.29, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 22.57, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 13.99, + "tps_std": 0.0, + "error": true, + "error_type": "hang", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "build": null + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 93.53, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 22.66, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx32768.log", + "build": null + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 135.1, + "tps_std": 0.15, "error": false, "error_type": null, "backend": "ROCm", @@ -102,8 +299,8 @@ "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -113,9 +310,11 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma", "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 21.04, - "tps_std": 0.01, + "tps_mean": 21.72, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -127,8 +326,59 @@ "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 12.41, + "tps_std": 0.0, + "error": true, + "error_type": "hang", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "build": null + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 142.39, + "tps_std": 0.25, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -137,9 +387,62 @@ "env": "rocm6_4_4-rocwmma-hblt0", "env_base": "rocm6_4_4", "env_variant": "rocwmma-hblt0", - "fa": false, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 21.7, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 13.3, + "tps_std": 0.0, + "error": true, + "error_type": "hang", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "build": null + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 135.87, + "tps_mean": 135.5, "tps_std": 0.06, "error": false, "error_type": null, @@ -150,160 +453,10 @@ "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 20.46, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 171.53, - "tps_std": 0.45, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 21.08, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 126.3, - "tps_std": 0.14, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 19.83, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 136.0, - "tps_std": 0.15, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -313,8 +466,10 @@ "env_base": "rocm6_4_4", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 20.93, + "tps_mean": 21.19, "tps_std": 0.0, "error": false, "error_type": null, @@ -327,20 +482,22 @@ "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", + "env": "rocm6_4_4", "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 160.81, - "tps_std": 0.78, + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 30.21, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -350,22 +507,24 @@ "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", + "env": "rocm6_4_4", "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 20.41, - "tps_std": 0.01, + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 9.28, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -375,10 +534,10 @@ "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -388,9 +547,11 @@ "env_base": "rocm6_4_4", "env_variant": "hblt0", "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 132.96, - "tps_std": 0.49, + "tps_mean": 172.61, + "tps_std": 0.32, "error": false, "error_type": null, "backend": "ROCm", @@ -402,8 +563,8 @@ "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -413,8 +574,10 @@ "env_base": "rocm6_4_4", "env_variant": "hblt0", "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 20.99, + "tps_mean": 21.78, "tps_std": 0.0, "error": false, "error_type": null, @@ -427,47 +590,24 @@ "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 102.61, - "tps_std": 0.2, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 20.54, + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 33.91, "tps_std": 0.0, - "error": false, - "error_type": null, + "error": true, + "error_type": "hang", "backend": "ROCm", "ngl": 99, "mmap": 0, @@ -475,22 +615,21 @@ "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", + "build": null }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", "env_variant": "rocwmma", "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 67.08, - "tps_std": 0.15, + "tps_mean": 77.55, + "tps_std": 0.6, "error": false, "error_type": null, "backend": "ROCm", @@ -500,21 +639,23 @@ "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "ee8dd5c65", + "number": "7035" } }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", "env_variant": "rocwmma", "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 21.07, + "tps_mean": 21.7, "tps_std": 0.0, "error": false, "error_type": null, @@ -525,49 +666,26 @@ "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "ee8dd5c65", + "number": "7035" } }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 117.71, - "tps_std": 0.66, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 20.53, + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 16.62, "tps_std": 0.0, - "error": false, - "error_type": null, + "error": true, + "error_type": "hang", "backend": "ROCm", "ngl": 99, "mmap": 0, @@ -575,22 +693,21 @@ "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log", + "build": null }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", "env_variant": "rocwmma-hblt0", "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 148.21, - "tps_std": 0.25, + "tps_mean": 146.5, + "tps_std": 0.48, "error": false, "error_type": null, "backend": "ROCm", @@ -600,21 +717,23 @@ "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "ee8dd5c65", + "number": "7035" } }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", "env_variant": "rocwmma-hblt0", "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 21.1, + "tps_mean": 21.72, "tps_std": 0.0, "error": false, "error_type": null, @@ -625,49 +744,26 @@ "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "ee8dd5c65", + "number": "7035" } }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 119.33, - "tps_std": 0.28, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 20.19, + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 17.23, "tps_std": 0.0, - "error": false, - "error_type": null, + "error": true, + "error_type": "runtime", "backend": "ROCm", "ngl": 99, "mmap": 0, @@ -675,21 +771,236 @@ "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "build": null }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", + "env": "rocm7.1", + "env_base": "rocm7.1", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 93.03, + "tps_mean": 99.42, + "tps_std": 0.19, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 21.7, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 29.83, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 7.85, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 172.08, + "tps_std": 0.59, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 21.73, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 32.69, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 7.85, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 72.38, "tps_std": 0.12, "error": false, "error_type": null, @@ -700,10 +1011,166 @@ "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 21.73, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 16.4, + "tps_std": 0.0, + "error": true, + "error_type": "hang", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log", + "build": null + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 131.85, + "tps_std": 0.23, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 21.71, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 17.15, + "tps_std": 0.0, + "error": true, + "error_type": "hang", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "build": null + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 99.25, + "tps_std": 0.11, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -713,9 +1180,11 @@ "env_base": "rocm7_rc", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 21.0, - "tps_std": 0.0, + "tps_mean": 21.54, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -727,59 +1196,33 @@ "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-hblt0", + "env": "rocm7_rc", "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 136.81, - "tps_std": 0.43, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 20.52, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1__longctx32768.log", + "build": null }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", @@ -788,9 +1231,11 @@ "env_base": "rocm7_rc", "env_variant": "hblt0", "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 148.95, - "tps_std": 0.73, + "tps_mean": 169.93, + "tps_std": 0.33, "error": false, "error_type": null, "backend": "ROCm", @@ -802,8 +1247,8 @@ "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -813,8 +1258,10 @@ "env_base": "rocm7_rc", "env_variant": "hblt0", "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 21.04, + "tps_mean": 21.71, "tps_std": 0.0, "error": false, "error_type": null, @@ -827,58 +1274,62 @@ "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 216.84, - "tps_std": 0.52, + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 33.65, + "tps_std": 0.0, "error": false, "error_type": null, - "backend": "Vulkan", + "backend": "ROCm", "ngl": 99, "mmap": 0, "params_b": 110.47, "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 24.15, - "tps_std": 0.01, + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 7.82, + "tps_std": 0.0, "error": false, "error_type": null, - "backend": "Vulkan", + "backend": "ROCm", "ngl": 99, "mmap": 0, "params_b": 110.47, "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -888,9 +1339,11 @@ "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 218.68, - "tps_std": 0.54, + "tps_mean": 217.91, + "tps_std": 0.48, "error": false, "error_type": null, "backend": "Vulkan", @@ -902,8 +1355,8 @@ "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -913,9 +1366,11 @@ "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 24.27, - "tps_std": 0.01, + "tps_mean": 24.5, + "tps_std": 0.06, "error": false, "error_type": null, "backend": "Vulkan", @@ -927,20 +1382,22 @@ "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 223.39, - "tps_std": 1.25, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 22.82, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", @@ -950,22 +1407,24 @@ "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 24.06, - "tps_std": 0.03, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 5.79, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", @@ -975,10 +1434,10 @@ "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -988,9 +1447,11 @@ "env_base": "vulkan_radv", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 236.02, - "tps_std": 2.6, + "tps_mean": 235.07, + "tps_std": 0.58, "error": false, "error_type": null, "backend": "Vulkan", @@ -1002,8 +1463,8 @@ "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -1013,8 +1474,10 @@ "env_base": "vulkan_radv", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 24.51, + "tps_mean": 24.84, "tps_std": 0.01, "error": false, "error_type": null, @@ -1027,20 +1490,76 @@ "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 33.43, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 10.49, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": false, + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 118.41, - "tps_std": 0.2, + "tps_mean": 92.38, + "tps_std": 0.37, "error": false, "error_type": null, "backend": "ROCm", @@ -1050,22 +1569,24 @@ "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "31df4608", + "number": "7038" } }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": false, + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 15.75, - "tps_std": 0.16, + "tps_mean": 16.64, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -1075,10 +1596,250 @@ "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "31df4608", + "number": "7038" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 19.45, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "build": { + "hash": "31df4608", + "number": "7038" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 6.86, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "build": { + "hash": "31df4608", + "number": "7038" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 86.5, + "tps_std": 0.17, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 16.61, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 14.06, + "tps_std": 0.0, + "error": true, + "error_type": "hang", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "build": null + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 74.73, + "tps_std": 0.27, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 16.66, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 26.97, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 6.58, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -1088,9 +1849,11 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma", "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 128.51, - "tps_std": 0.51, + "tps_mean": 125.43, + "tps_std": 0.26, "error": false, "error_type": null, "backend": "ROCm", @@ -1102,8 +1865,8 @@ "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -1113,9 +1876,11 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma", "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 16.13, - "tps_std": 0.16, + "tps_mean": 16.48, + "tps_std": 0.14, "error": false, "error_type": null, "backend": "ROCm", @@ -1127,20 +1892,46 @@ "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "1c398dc9e", + "number": "7034" } }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "hang", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "build": null + }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", "env": "rocm6_4_4-rocwmma-hblt0", "env_base": "rocm6_4_4", "env_variant": "rocwmma-hblt0", - "fa": false, + "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 133.48, - "tps_std": 0.45, + "tps_mean": 140.41, + "tps_std": 0.79, "error": false, "error_type": null, "backend": "ROCm", @@ -1150,10 +1941,10 @@ "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -1162,9 +1953,170 @@ "env": "rocm6_4_4-rocwmma-hblt0", "env_base": "rocm6_4_4", "env_variant": "rocwmma-hblt0", - "fa": false, + "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 15.77, + "tps_mean": 16.52, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 13.2, + "tps_std": 0.0, + "error": true, + "error_type": "hang", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "build": null + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 130.63, + "tps_std": 0.7, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 16.44, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 26.75, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 8.1, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 145.79, "tps_std": 0.11, "error": false, "error_type": null, @@ -1175,46 +2127,23 @@ "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", + "env": "rocm6_4_4-hblt0", "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 143.55, - "tps_std": 0.54, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", + "env_variant": "hblt0", "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 16.17, + "tps_mean": 16.57, "tps_std": 0.06, "error": false, "error_type": null, @@ -1225,49 +2154,26 @@ "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4", + "env": "rocm6_4_4-hblt0", "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 121.76, - "tps_std": 0.14, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 15.69, + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 33.65, "tps_std": 0.0, - "error": false, - "error_type": null, + "error": true, + "error_type": "hang", "backend": "ROCm", "ngl": 99, "mmap": 0, @@ -1275,22 +2181,21 @@ "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", + "build": null }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 129.77, - "tps_std": 0.12, + "tps_mean": 69.31, + "tps_std": 0.07, "error": false, "error_type": null, "backend": "ROCm", @@ -1300,24 +2205,53 @@ "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "ee8dd5c65", + "number": "7035" } }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 16.14, + "tps_mean": 16.5, + "tps_std": 0.09, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 17.07, "tps_std": 0.0, - "error": false, - "error_type": null, + "error": true, + "error_type": "hang", "backend": "ROCm", "ngl": 99, "mmap": 0, @@ -1325,71 +2259,125 @@ "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log", + "build": null }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 140.68, - "tps_std": 0.66, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 15.84, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 134.52, + "tps_mean": 136.65, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 16.46, + "tps_std": 0.16, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 17.05, + "tps_std": 0.0, + "error": true, + "error_type": "hang", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "build": null + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 94.32, + "tps_std": 0.2, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 16.25, "tps_std": 0.53, "error": false, "error_type": null, @@ -1400,47 +2388,48 @@ "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "ee8dd5c65", + "number": "7035" } }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__fa1__longctx32768.log", + "build": null + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", "env_variant": "hblt0", "fa": true, - "test": "tg128", - "tps_mean": 16.08, - "tps_std": 0.21, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 97.09, - "tps_std": 0.15, + "tps_mean": 130.72, + "tps_std": 0.76, "error": false, "error_type": null, "backend": "ROCm", @@ -1450,10 +2439,88 @@ "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__hblt0__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 16.54, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "hang", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log", + "build": null + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 114.56, + "tps_std": 0.18, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" } }, { @@ -1462,9 +2529,11 @@ "env": "rocm7_rc-rocwmma", "env_base": "rocm7_rc", "env_variant": "rocwmma", - "fa": false, + "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 15.89, + "tps_mean": 16.58, "tps_std": 0.0, "error": false, "error_type": null, @@ -1475,10 +2544,10 @@ "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "bca95ca51", + "number": "7036" } }, { @@ -1488,9 +2557,11 @@ "env_base": "rocm7_rc", "env_variant": "rocwmma", "fa": true, - "test": "pp512", - "tps_mean": 69.91, - "tps_std": 0.44, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 16.46, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -1500,10 +2571,10 @@ "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "bca95ca51", + "number": "7036" } }, { @@ -1513,9 +2584,11 @@ "env_base": "rocm7_rc", "env_variant": "rocwmma", "fa": true, - "test": "tg128", - "tps_mean": 16.13, - "tps_std": 0.11, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 1.65, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -1525,60 +2598,10 @@ "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log", "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 128.74, - "tps_std": 0.3, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 15.77, - "tps_std": 0.2, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "bca95ca51", + "number": "7036" } }, { @@ -1588,9 +2611,11 @@ "env_base": "rocm7_rc", "env_variant": "rocwmma-hblt0", "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 133.32, - "tps_std": 0.82, + "tps_mean": 159.14, + "tps_std": 0.64, "error": false, "error_type": null, "backend": "ROCm", @@ -1602,8 +2627,8 @@ "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "bca95ca51", + "number": "7036" } }, { @@ -1613,8 +2638,10 @@ "env_base": "rocm7_rc", "env_variant": "rocwmma-hblt0", "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 16.1, + "tps_mean": 16.44, "tps_std": 0.2, "error": false, "error_type": null, @@ -1627,47 +2654,24 @@ "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "bca95ca51", + "number": "7036" } }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc", + "env": "rocm7_rc-rocwmma-hblt0", "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 91.95, - "tps_std": 0.23, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 15.8, + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 17.46, "tps_std": 0.0, - "error": false, - "error_type": null, + "error": true, + "error_type": "hang", "backend": "ROCm", "ngl": 99, "mmap": 0, @@ -1675,11 +2679,8 @@ "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "build": null }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", @@ -1688,9 +2689,11 @@ "env_base": "rocm7_rc", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 125.81, - "tps_std": 0.29, + "tps_mean": 96.45, + "tps_std": 0.26, "error": false, "error_type": null, "backend": "ROCm", @@ -1702,8 +2705,8 @@ "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -1713,8 +2716,10 @@ "env_base": "rocm7_rc", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 16.2, + "tps_mean": 16.51, "tps_std": 0.0, "error": false, "error_type": null, @@ -1727,59 +2732,33 @@ "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-hblt0", + "env": "rocm7_rc", "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 142.12, - "tps_std": 0.6, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 15.89, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1__longctx32768.log", + "build": null }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", @@ -1788,9 +2767,11 @@ "env_base": "rocm7_rc", "env_variant": "hblt0", "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 139.6, - "tps_std": 0.48, + "tps_mean": 130.86, + "tps_std": 0.36, "error": false, "error_type": null, "backend": "ROCm", @@ -1802,8 +2783,8 @@ "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -1813,533 +2794,277 @@ "env_base": "rocm7_rc", "env_variant": "hblt0", "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 16.1, - "tps_std": 0.17, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 260.51, - "tps_std": 1.03, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 17.26, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 262.18, - "tps_std": 1.19, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 17.3, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 222.31, - "tps_std": 0.71, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 17.43, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 233.21, - "tps_std": 6.28, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 17.65, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 98.0, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 2.77, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 103.65, - "tps_std": 0.17, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 2.79, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 98.82, - "tps_std": 0.24, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 2.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 102.51, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 2.79, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 97.85, + "tps_mean": 16.53, "tps_std": 0.06, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4.log", + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" } }, { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 2.76, + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 36.62, "tps_std": 0.0, - "error": false, - "error_type": null, + "error": true, + "error_type": "hang", "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4.log", + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log", + "build": null + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 261.54, + "tps_std": 1.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" } }, { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 17.42, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 23.19, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 5.25, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 102.47, + "tps_mean": 233.87, "tps_std": 0.08, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log", + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" } }, { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 2.79, + "tps_mean": 17.74, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 33.31, "tps_std": 0.0, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log", + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 9.0, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 98.12, + "tps_mean": 103.85, "tps_std": 0.1, "error": false, "error_type": null, @@ -2350,69 +3075,21 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "31df4608", + "number": "7038" } }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 2.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 104.23, - "tps_std": 0.24, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", "tps_mean": 2.79, "tps_std": 0.0, @@ -2425,46 +3102,23 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "31df4608", + "number": "7038" } }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 92.92, - "tps_std": 8.6, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 2.78, + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 19.88, "tps_std": 0.0, "error": false, "error_type": null, @@ -2475,96 +3129,23 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "31df4608", + "number": "7038" } }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", "fa": true, - "test": "pp512", - "tps_mean": 103.05, - "tps_std": 0.09, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 2.78, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 93.96, - "tps_std": 0.14, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 2.78, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 2.46, "tps_std": 0.0, "error": false, "error_type": null, @@ -2575,71 +3156,23 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "31df4608", + "number": "7038" } }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 97.51, - "tps_std": 0.21, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 2.79, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 95.55, + "tps_mean": 103.65, "tps_std": 0.07, "error": false, "error_type": null, @@ -2650,19 +3183,393 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 2.79, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 12.76, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 0.8, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 105.64, + "tps_std": 0.24, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 2.79, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 33.13, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 2.31, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 102.56, + "tps_std": 0.06, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 2.78, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 11.74, + "tps_std": 0.0, + "error": true, + "error_type": "hang", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "build": null + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 103.23, + "tps_std": 0.06, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 2.78, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 11.69, + "tps_std": 0.0, + "error": true, + "error_type": "hang", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "build": null + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", "env_variant": null, - "fa": false, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 105.28, + "tps_std": 0.1, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", "tps_mean": 2.77, "tps_std": 0.0, @@ -2675,22 +3582,48 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", "env_variant": null, "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "hang", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 70.0, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "build": null + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 104.25, - "tps_std": 0.16, + "tps_mean": 104.32, + "tps_std": 0.29, "error": false, "error_type": null, "backend": "ROCm", @@ -2700,19 +3633,21 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", "tps_mean": 2.78, "tps_std": 0.0, @@ -2725,46 +3660,23 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 93.72, - "tps_std": 0.13, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 2.78, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 34.35, "tps_std": 0.0, "error": false, "error_type": null, @@ -2775,10 +3687,763 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 2.4, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 102.8, + "tps_std": 0.11, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 2.79, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 15.25, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 1.07, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 102.92, + "tps_std": 0.14, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 2.79, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 15.47, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 1.07, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 103.28, + "tps_std": 0.1, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 2.79, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 34.15, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 2.41, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 104.3, + "tps_std": 0.24, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 2.79, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 35.54, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 2.46, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 103.21, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 2.79, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 15.05, + "tps_std": 0.0, + "error": true, + "error_type": "hang", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log", + "build": null + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 96.88, + "tps_std": 0.1, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 2.79, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 15.08, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 1.07, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 104.95, + "tps_std": 0.14, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 2.79, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 32.03, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 2.43, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -2788,9 +4453,11 @@ "env_base": "rocm7_rc", "env_variant": "hblt0", "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 97.17, - "tps_std": 0.16, + "tps_mean": 97.99, + "tps_std": 0.15, "error": false, "error_type": null, "backend": "ROCm", @@ -2802,8 +4469,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -2813,6 +4480,8 @@ "env_base": "rocm7_rc", "env_variant": "hblt0", "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", "tps_mean": 2.79, "tps_std": 0.0, @@ -2827,59 +4496,33 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 97.22, - "tps_std": 0.38, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 70.0, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 2.81, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log", + "build": null }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", @@ -2888,9 +4531,11 @@ "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 98.46, - "tps_std": 0.54, + "tps_mean": 98.55, + "tps_std": 0.11, "error": false, "error_type": null, "backend": "Vulkan", @@ -2902,8 +4547,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -2913,6 +4558,8 @@ "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", "tps_mean": 2.8, "tps_std": 0.0, @@ -2927,44 +4574,21 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 83.82, - "tps_std": 1.56, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 2.78, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 18.09, "tps_std": 0.0, "error": false, "error_type": null, @@ -2975,10 +4599,37 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 2.12, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -2988,9 +4639,11 @@ "env_base": "vulkan_radv", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 86.06, - "tps_std": 1.83, + "tps_mean": 86.56, + "tps_std": 0.57, "error": false, "error_type": null, "backend": "Vulkan", @@ -3002,8 +4655,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -3013,8 +4666,10 @@ "env_base": "vulkan_radv", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 2.78, + "tps_mean": 2.77, "tps_std": 0.0, "error": false, "error_type": null, @@ -3027,120 +4682,76 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 264.81, - "tps_std": 10.73, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 14.68, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, "fa": true, - "test": "pp512", - "tps_mean": 282.95, - "tps_std": 5.18, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 22.4, + "tps_std": 0.0, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "1c398dc9e", + "number": "7034" } }, { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, "fa": true, - "test": "tg128", - "tps_mean": 14.77, - "tps_std": 0.06, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 2.36, + "tps_std": 0.0, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 278.22, - "tps_std": 1.12, + "tps_mean": 263.94, + "tps_std": 2.74, "error": false, "error_type": null, "backend": "ROCm", @@ -3150,21 +4761,101 @@ "file_size_gib": 82.35, "name_params_b": 107.77, "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "31df4608", + "number": "7038" } }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 14.67, + "tps_mean": 15.18, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log", + "build": { + "hash": "31df4608", + "number": "7038" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 155.11, + "tps_std": 0.0, + "error": true, + "error_type": "hang", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "build": null + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 273.53, + "tps_std": 2.86, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 15.17, "tps_std": 0.03, "error": false, "error_type": null, @@ -3175,10 +4866,250 @@ "file_size_gib": 82.35, "name_params_b": 107.77, "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 109.57, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 8.85, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 273.71, + "tps_std": 1.81, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 15.19, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 188.24, + "tps_std": 0.0, + "error": true, + "error_type": "hang", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha__fa1__longctx32768.log", + "build": null + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 289.25, + "tps_std": 1.39, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 15.16, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 102.08, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 10.97, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -3188,55 +5119,37 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma-hblt0", "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 292.12, + "tps_std": 0.61, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 269.91, - "tps_std": 1.51, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_4", + "env": "rocm6_4_4-rocwmma-hblt0", "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 14.5, + "tps_mean": 15.16, "tps_std": 0.0, "error": false, "error_type": null, @@ -3247,218 +5160,182 @@ "file_size_gib": 82.35, "name_params_b": 107.77, "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_4", + "env": "rocm6_4_4-rocwmma-hblt0", "env_base": "rocm6_4_4", - "env_variant": null, + "env_variant": "rocwmma-hblt0", "fa": true, - "test": "pp512", - "tps_mean": 288.79, - "tps_std": 1.53, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 14.79, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 276.43, - "tps_std": 0.75, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 14.69, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 99.11, + "tps_std": 0.0, "error": true, "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "build": null + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 290.6, + "tps_std": 0.66, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 15.04, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 201.09, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 11.17, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 292.38, + "tps_std": 1.41, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 270.82, - "tps_std": 1.43, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 14.66, - "tps_std": 0.07, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 288.92, - "tps_std": 3.51, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 14.81, + "tps_mean": 15.18, "tps_std": 0.0, "error": false, "error_type": null, @@ -3469,119 +5346,21 @@ "file_size_gib": 82.35, "name_params_b": 107.77, "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 275.26, - "tps_std": 1.54, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 14.66, - "tps_std": 0.08, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", "fa": true, - "test": "pp512", - "tps_mean": 285.69, - "tps_std": 1.15, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 14.82, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, + "context": "longctx32768", + "context_tokens": 32768, "test": null, "tps_mean": null, "tps_std": null, @@ -3594,9 +5373,567 @@ "file_size_gib": null, "name_params_b": 17.0, "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", "build": null }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 293.23, + "tps_std": 0.81, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 15.16, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 128.37, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 10.22, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 287.19, + "tps_std": 1.41, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 15.17, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 132.25, + "tps_std": 0.0, + "error": true, + "error_type": "hang", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "build": null + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "hang", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 17.0, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__fa1.log", + "build": null + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 204.62, + "tps_std": 0.0, + "error": true, + "error_type": "hang", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__fa1__longctx32768.log", + "build": null + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 294.05, + "tps_std": 2.13, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 15.17, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 201.32, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 9.96, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 292.59, + "tps_std": 1.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 15.16, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 126.89, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 10.26, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 287.84, + "tps_std": 2.59, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 15.17, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 127.46, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 10.33, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", @@ -3604,8 +5941,10 @@ "env_base": "rocm7_rc", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 291.9, + "tps_mean": 292.02, "tps_std": 1.98, "error": false, "error_type": null, @@ -3618,8 +5957,8 @@ "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -3629,9 +5968,11 @@ "env_base": "rocm7_rc", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 14.78, - "tps_std": 0.02, + "tps_mean": 15.13, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -3643,20 +5984,22 @@ "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-hblt0", + "env": "rocm7_rc", "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 271.87, - "tps_std": 1.0, + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 207.12, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -3666,22 +6009,24 @@ "file_size_gib": 82.35, "name_params_b": 107.77, "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1__longctx32768.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-hblt0", + "env": "rocm7_rc", "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 14.69, - "tps_std": 0.01, + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 10.64, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -3691,10 +6036,10 @@ "file_size_gib": 82.35, "name_params_b": 107.77, "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1__longctx32768.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -3704,271 +6049,617 @@ "env_base": "rocm7_rc", "env_variant": "hblt0", "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 282.7, + "tps_std": 0.57, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 15.15, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 202.42, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 10.16, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 224.36, + "tps_std": 2.08, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 15.75, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 84.26, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 10.85, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 211.78, + "tps_std": 1.53, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 15.73, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 85.49, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 12.59, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 272.42, + "tps_std": 2.18, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log", + "build": { + "hash": "31df4608", + "number": "7038" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 12.16, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log", + "build": { + "hash": "31df4608", + "number": "7038" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 149.67, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "build": { + "hash": "31df4608", + "number": "7038" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 7.51, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "build": { + "hash": "31df4608", + "number": "7038" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 273.57, + "tps_std": 2.62, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 12.18, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 108.17, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 7.86, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 274.27, + "tps_std": 3.87, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 12.17, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 190.45, + "tps_std": 0.0, + "error": true, + "error_type": "hang", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha__fa1__longctx32768.log", "build": null }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 224.42, - "tps_std": 3.09, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 15.99, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 224.57, - "tps_std": 3.64, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 15.76, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 206.64, - "tps_std": 2.56, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 15.81, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 212.38, - "tps_std": 2.39, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 15.76, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 275.0, - "tps_std": 1.39, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 11.89, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", @@ -3976,9 +6667,11 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma", "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 293.68, - "tps_std": 3.72, + "tps_mean": 296.39, + "tps_std": 0.35, "error": false, "error_type": null, "backend": "ROCm", @@ -3990,8 +6683,8 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -4001,158 +6694,10 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma", "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 11.96, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 260.53, - "tps_std": 23.26, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 11.82, - "tps_std": 0.11, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 295.09, - "tps_std": 2.26, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 11.98, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 276.63, - "tps_std": 1.32, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 11.87, + "tps_mean": 12.15, "tps_std": 0.0, "error": false, "error_type": null, @@ -4163,12 +6708,114 @@ "file_size_gib": 106.65, "name_params_b": 107.77, "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" } }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 100.8, + "tps_std": 0.0, + "error": true, + "error_type": "hang", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "build": null + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 295.81, + "tps_std": 2.22, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 12.15, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 99.06, + "tps_std": 0.0, + "error": true, + "error_type": "runtime", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "build": null + }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", @@ -4176,9 +6823,11 @@ "env_base": "rocm6_4_4", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 299.51, - "tps_std": 2.06, + "tps_mean": 295.53, + "tps_std": 3.47, "error": false, "error_type": null, "backend": "ROCm", @@ -4190,8 +6839,8 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -4201,8 +6850,10 @@ "env_base": "rocm6_4_4", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 11.99, + "tps_mean": 12.12, "tps_std": 0.0, "error": false, "error_type": null, @@ -4215,44 +6866,21 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_4-hblt0", + "env": "rocm6_4_4", "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 283.0, - "tps_std": 2.1, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 11.9, + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 208.57, "tps_std": 0.0, "error": false, "error_type": null, @@ -4263,10 +6891,37 @@ "file_size_gib": 106.65, "name_params_b": 107.77, "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 9.36, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -4276,9 +6931,11 @@ "env_base": "rocm6_4_4", "env_variant": "hblt0", "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 293.35, - "tps_std": 11.21, + "tps_mean": 298.3, + "tps_std": 1.55, "error": false, "error_type": null, "backend": "ROCm", @@ -4290,8 +6947,8 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -4301,9 +6958,11 @@ "env_base": "rocm6_4_4", "env_variant": "hblt0", "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 11.94, - "tps_std": 0.13, + "tps_mean": 12.15, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -4315,372 +6974,24 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 272.39, - "tps_std": 2.15, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 11.92, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 296.04, - "tps_std": 2.16, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 11.98, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 266.07, - "tps_std": 22.72, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 11.92, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 257.0, - "tps_std": 4.08, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 11.76, - "tps_std": 0.51, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 263.12, - "tps_std": 18.31, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 11.89, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 284.19, - "tps_std": 24.64, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 11.89, - "tps_std": 0.2, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 273.01, - "tps_std": 1.64, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 11.85, - "tps_std": 0.11, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", "env_variant": "hblt0", "fa": true, + "context": "longctx32768", + "context_tokens": 32768, "test": null, "tps_mean": null, "tps_std": null, "error": true, - "error_type": "hang", + "error_type": "runtime", "backend": null, "ngl": null, "mmap": null, @@ -4688,57 +6999,811 @@ "file_size_gib": null, "name_params_b": 17.0, "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", "build": null }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 346.53, - "tps_std": 1.71, + "tps_mean": 295.26, + "tps_std": 1.05, "error": false, "error_type": null, - "backend": "Vulkan", + "backend": "ROCm", "ngl": 99, "mmap": 0, "params_b": 107.77, "file_size_gib": 106.65, "name_params_b": 107.77, "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "ee8dd5c65", + "number": "7035" } }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 12.57, - "tps_std": 0.01, + "tps_mean": 12.16, + "tps_std": 0.0, "error": false, "error_type": null, - "backend": "Vulkan", + "backend": "ROCm", "ngl": 99, "mmap": 0, "params_b": 107.77, "file_size_gib": 106.65, "name_params_b": 107.77, "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 124.66, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 8.76, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 292.62, + "tps_std": 1.21, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 12.17, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 124.35, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 8.68, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 296.33, + "tps_std": 1.49, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 12.17, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 17.0, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__fa1__longctx32768.log", + "build": null + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 296.32, + "tps_std": 1.14, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 12.16, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 202.17, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 8.77, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 291.43, + "tps_std": 1.53, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 12.16, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 127.05, + "tps_std": 0.0, + "error": true, + "error_type": "hang", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log", + "build": null + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 287.94, + "tps_std": 1.41, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 12.16, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 127.25, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 8.36, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 268.04, + "tps_std": 46.82, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 12.14, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 204.94, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 8.7, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 287.0, + "tps_std": 2.42, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 12.17, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 202.54, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 8.69, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -4748,9 +7813,11 @@ "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 346.93, - "tps_std": 1.5, + "tps_mean": 349.58, + "tps_std": 2.09, "error": false, "error_type": null, "backend": "Vulkan", @@ -4762,8 +7829,8 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -4773,6 +7840,8 @@ "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", "tps_mean": 12.44, "tps_std": 0.0, @@ -4787,20 +7856,22 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "vulkan_radv", - "env_base": "vulkan_radv", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 272.53, - "tps_std": 1.82, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 99.9, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", @@ -4810,22 +7881,24 @@ "file_size_gib": 106.65, "name_params_b": 107.77, "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "vulkan_radv", - "env_base": "vulkan_radv", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 12.58, - "tps_std": 0.01, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 9.2, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", @@ -4835,10 +7908,10 @@ "file_size_gib": 106.65, "name_params_b": 107.77, "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -4848,9 +7921,11 @@ "env_base": "vulkan_radv", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 280.38, - "tps_std": 1.48, + "tps_mean": 280.28, + "tps_std": 1.95, "error": false, "error_type": null, "backend": "Vulkan", @@ -4862,8 +7937,8 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -4873,8 +7948,10 @@ "env_base": "vulkan_radv", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 12.58, + "tps_mean": 12.57, "tps_std": 0.0, "error": false, "error_type": null, @@ -4887,20 +7964,76 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 106.42, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 10.49, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": false, + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 283.37, - "tps_std": 1.53, + "tps_mean": 313.68, + "tps_std": 2.67, "error": false, "error_type": null, "backend": "ROCm", @@ -4910,21 +8043,23 @@ "file_size_gib": 57.73, "name_params_b": 107.77, "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "31df4608", + "number": "7038" } }, { "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": false, + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 17.81, + "tps_mean": 19.49, "tps_std": 0.0, "error": false, "error_type": null, @@ -4935,46 +8070,23 @@ "file_size_gib": 57.73, "name_params_b": 107.77, "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "31df4608", + "number": "7038" } }, { "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", "fa": true, - "test": "pp512", - "tps_mean": 305.77, - "tps_std": 1.56, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 17.97, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 154.48, "tps_std": 0.0, "error": false, "error_type": null, @@ -4985,19 +8097,210 @@ "file_size_gib": 57.73, "name_params_b": 107.77, "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "31df4608", + "number": "7038" } }, { "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 11.04, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "build": { + "hash": "31df4608", + "number": "7038" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 311.89, + "tps_std": 2.25, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 19.49, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 109.25, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 10.06, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 314.61, + "tps_std": 2.38, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 19.52, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, "test": null, "tps_mean": null, "tps_std": null, @@ -5010,9 +8313,117 @@ "file_size_gib": null, "name_params_b": 17.0, "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx32768.log", "build": null }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 308.87, + "tps_std": 1.54, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 18.54, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 101.87, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 12.83, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, { "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", @@ -5020,9 +8431,11 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma-hblt0", "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 306.78, - "tps_std": 2.0, + "tps_mean": 306.69, + "tps_std": 2.02, "error": false, "error_type": null, "backend": "ROCm", @@ -5034,8 +8447,8 @@ "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -5045,9 +8458,11 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma-hblt0", "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 17.96, - "tps_std": 0.0, + "tps_mean": 18.58, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -5059,44 +8474,21 @@ "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_4", + "env": "rocm6_4_4-rocwmma-hblt0", "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 285.54, - "tps_std": 1.17, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 17.59, + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 100.29, "tps_std": 0.0, "error": false, "error_type": null, @@ -5107,10 +8499,37 @@ "file_size_gib": 57.73, "name_params_b": 107.77, "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 12.72, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -5120,9 +8539,11 @@ "env_base": "rocm6_4_4", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 310.82, - "tps_std": 2.23, + "tps_mean": 308.5, + "tps_std": 4.59, "error": false, "error_type": null, "backend": "ROCm", @@ -5134,8 +8555,8 @@ "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -5145,8 +8566,10 @@ "env_base": "rocm6_4_4", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 17.96, + "tps_mean": 18.57, "tps_std": 0.0, "error": false, "error_type": null, @@ -5159,20 +8582,22 @@ "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", + "env": "rocm6_4_4", "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 295.23, - "tps_std": 0.7, + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 209.52, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -5182,22 +8607,24 @@ "file_size_gib": 57.73, "name_params_b": 107.77, "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", + "env": "rocm6_4_4", "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 17.81, - "tps_std": 0.01, + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 12.95, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -5207,10 +8634,10 @@ "file_size_gib": 57.73, "name_params_b": 107.77, "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -5220,9 +8647,11 @@ "env_base": "rocm6_4_4", "env_variant": "hblt0", "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 312.09, - "tps_std": 1.64, + "tps_mean": 308.24, + "tps_std": 0.27, "error": false, "error_type": null, "backend": "ROCm", @@ -5234,8 +8663,8 @@ "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -5245,8 +8674,10 @@ "env_base": "rocm6_4_4", "env_variant": "hblt0", "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 17.97, + "tps_mean": 18.59, "tps_std": 0.0, "error": false, "error_type": null, @@ -5259,370 +8690,22 @@ "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 281.11, - "tps_std": 2.07, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 17.82, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 305.36, - "tps_std": 1.78, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 17.97, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 284.75, - "tps_std": 2.41, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 17.8, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 298.01, - "tps_std": 1.45, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 17.96, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 281.91, - "tps_std": 2.33, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 17.79, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 306.77, - "tps_std": 2.47, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 17.97, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 284.71, - "tps_std": 1.24, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 17.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", "env_variant": "hblt0", "fa": true, - "test": "pp512", - "tps_mean": 299.68, - "tps_std": 1.75, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 217.84, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -5632,21 +8715,23 @@ "file_size_gib": 57.73, "name_params_b": 107.77, "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", "env_variant": "hblt0", "fa": true, - "test": "tg128", - "tps_mean": 17.93, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 12.97, "tps_std": 0.0, "error": false, "error_type": null, @@ -5657,422 +8742,1074 @@ "file_size_gib": 57.73, "name_params_b": 107.77, "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 193.74, - "tps_std": 0.96, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 21.05, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 194.33, - "tps_std": 1.56, + "tps_mean": 304.34, + "tps_std": 2.51, "error": false, "error_type": null, - "backend": "Vulkan", + "backend": "ROCm", "ngl": 99, "mmap": 0, "params_b": 107.77, "file_size_gib": 57.73, "name_params_b": 107.77, "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "ee8dd5c65", + "number": "7035" } }, { "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 20.64, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 223.8, - "tps_std": 2.7, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 20.91, + "tps_mean": 18.61, "tps_std": 0.02, "error": false, "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 228.13, - "tps_std": 3.26, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 20.88, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 131.74, - "tps_std": 0.4, - "error": false, - "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma.log", + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "ee8dd5c65", + "number": "7035" } }, { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 14.54, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 138.29, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma.log", + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "ee8dd5c65", + "number": "7035" } }, { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", "env_variant": "rocwmma", "fa": true, - "test": "pp512", - "tps_mean": 142.3, - "tps_std": 0.82, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 12.95, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "ee8dd5c65", + "number": "7035" } }, { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", "fa": true, - "test": "tg128", - "tps_mean": 14.9, - "tps_std": 0.12, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 134.45, - "tps_std": 0.46, + "tps_mean": 305.86, + "tps_std": 2.98, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log", + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "ee8dd5c65", + "number": "7035" } }, { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", "env_variant": "rocwmma-hblt0", - "fa": false, + "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 14.58, + "tps_mean": 18.55, "tps_std": 0.04, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log", + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "ee8dd5c65", + "number": "7035" } }, { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", "env_variant": "rocwmma-hblt0", "fa": true, - "test": "pp512", - "tps_mean": 141.69, - "tps_std": 0.47, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 134.32, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "ee8dd5c65", + "number": "7035" } }, { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", "env_variant": "rocwmma-hblt0", "fa": true, - "test": "tg128", - "tps_mean": 14.95, - "tps_std": 0.09, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 11.62, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "ee8dd5c65", + "number": "7035" } }, { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7.1", + "env_base": "rocm7.1", "env_variant": null, - "fa": false, + "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 132.25, - "tps_std": 0.49, + "tps_mean": 306.39, + "tps_std": 1.58, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 18.59, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 198.6, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 12.95, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 309.0, + "tps_std": 2.3, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 18.56, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 201.95, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 12.81, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 305.09, + "tps_std": 1.49, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 18.58, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 136.13, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 12.96, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 292.67, + "tps_std": 0.91, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 18.6, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 131.73, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 12.45, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 302.22, + "tps_std": 1.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 18.58, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 196.23, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 11.93, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 300.96, + "tps_std": 2.64, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 18.57, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "hang", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 17.0, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log", + "build": null + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 195.52, + "tps_std": 1.35, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 20.65, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 81.66, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 13.06, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 230.79, + "tps_std": 1.84, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 20.79, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 89.56, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 15.73, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 141.46, + "tps_std": 1.06, "error": false, "error_type": null, "backend": "ROCm", @@ -6082,21 +9819,23 @@ "file_size_gib": 96.99, "name_params_b": 235.09, "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4.log", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "31df4608", + "number": "7038" } }, { "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 14.39, + "tps_mean": 16.01, "tps_std": 0.0, "error": false, "error_type": null, @@ -6107,22 +9846,24 @@ "file_size_gib": 96.99, "name_params_b": 235.09, "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4.log", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "31df4608", + "number": "7038" } }, { "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", "fa": true, - "test": "pp512", - "tps_mean": 144.16, - "tps_std": 0.77, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 48.54, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -6132,22 +9873,24 @@ "file_size_gib": 96.99, "name_params_b": 235.09, "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "31df4608", + "number": "7038" } }, { "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", "fa": true, - "test": "tg128", - "tps_mean": 14.87, - "tps_std": 0.05, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 8.81, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -6157,321 +9900,23 @@ "file_size_gib": 96.99, "name_params_b": 235.09, "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "31df4608", + "number": "7038" } }, { "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 133.5, - "tps_std": 0.69, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 14.55, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 143.26, - "tps_std": 0.97, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 14.87, - "tps_std": 0.08, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 132.68, - "tps_std": 0.5, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 14.62, - "tps_std": 0.08, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 142.6, - "tps_std": 0.56, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 14.9, - "tps_std": 0.09, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 133.6, - "tps_std": 0.66, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 14.59, - "tps_std": 0.06, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 140.15, - "tps_std": 1.2, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 14.93, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 133.39, + "tps_mean": 143.18, "tps_std": 0.54, "error": false, "error_type": null, @@ -6482,21 +9927,23 @@ "file_size_gib": 96.99, "name_params_b": 235.09, "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 14.62, + "tps_mean": 16.08, "tps_std": 0.0, "error": false, "error_type": null, @@ -6507,22 +9954,48 @@ "file_size_gib": 96.99, "name_params_b": 235.09, "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 28.32, + "tps_std": 0.0, + "error": true, + "error_type": "hang", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "build": null + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 146.88, - "tps_std": 0.69, + "tps_mean": 144.03, + "tps_std": 1.12, "error": false, "error_type": null, "backend": "ROCm", @@ -6532,19 +10005,687 @@ "file_size_gib": 96.99, "name_params_b": 235.09, "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 16.04, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 38.64, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 9.44, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 143.65, + "tps_std": 1.06, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 15.07, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 24.32, + "tps_std": 0.0, + "error": true, + "error_type": "hang", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "build": null + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 142.82, + "tps_std": 1.43, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 15.11, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 24.0, + "tps_std": 0.0, + "error": true, + "error_type": "hang", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "build": null + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 146.45, + "tps_std": 0.58, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.88, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 47.05, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 9.63, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 145.3, + "tps_std": 1.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.98, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 48.23, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 9.36, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 144.51, + "tps_std": 1.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 15.16, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 27.99, + "tps_std": 0.0, + "error": true, + "error_type": "hang", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log", + "build": null + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 144.56, + "tps_std": 0.3, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 15.16, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 27.82, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 2.99, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 146.23, + "tps_std": 0.15, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", "tps_mean": 15.0, "tps_std": 0.0, @@ -6557,46 +10698,23 @@ "file_size_gib": 96.99, "name_params_b": 235.09, "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "ee8dd5c65", + "number": "7035" } }, { "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 134.05, - "tps_std": 0.64, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 14.64, + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 39.93, "tps_std": 0.0, "error": false, "error_type": null, @@ -6607,22 +10725,51 @@ "file_size_gib": 96.99, "name_params_b": 235.09, "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0.log", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__fa1__longctx32768.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "ee8dd5c65", + "number": "7035" } }, { "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 8.45, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", "env_variant": "hblt0", "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 144.44, - "tps_std": 0.78, + "tps_mean": 146.0, + "tps_std": 0.62, "error": false, "error_type": null, "backend": "ROCm", @@ -6632,21 +10779,23 @@ "file_size_gib": 96.99, "name_params_b": 235.09, "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__hblt0__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "ee8dd5c65", + "number": "7035" } }, { "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", "env_variant": "hblt0", "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 14.99, + "tps_mean": 14.97, "tps_std": 0.0, "error": false, "error_type": null, @@ -6657,2421 +10806,4013 @@ "file_size_gib": 96.99, "name_params_b": 235.09, "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__hblt0__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "ee8dd5c65", + "number": "7035" } }, { "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 136.12, - "tps_std": 1.49, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 17.32, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 135.43, - "tps_std": 4.81, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 17.14, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 120.72, - "tps_std": 3.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 17.74, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 125.48, - "tps_std": 4.53, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 18.02, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 436.29, - "tps_std": 4.51, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 25.54, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 480.95, - "tps_std": 4.32, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 25.82, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 346.51, - "tps_std": 4.73, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 25.49, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 356.62, - "tps_std": 6.87, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 25.85, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 435.7, - "tps_std": 6.51, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 24.23, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 485.65, - "tps_std": 7.08, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 25.71, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 343.63, - "tps_std": 2.43, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 25.52, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", "env_variant": "hblt0", "fa": true, - "test": "pp512", - "tps_mean": 364.38, - "tps_std": 4.78, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 25.83, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 40.14, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "ee8dd5c65", + "number": "7035" } }, { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 435.87, - "tps_std": 4.36, + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 8.09, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma.log", + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "ee8dd5c65", + "number": "7035" } }, { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 25.56, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", "env": "rocm7_rc-rocwmma", "env_base": "rocm7_rc", "env_variant": "rocwmma", "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 477.05, - "tps_std": 5.97, + "tps_mean": 144.65, + "tps_std": 0.59, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "bca95ca51", + "number": "7036" } }, { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", "env": "rocm7_rc-rocwmma", "env_base": "rocm7_rc", "env_variant": "rocwmma", "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 25.84, + "tps_mean": 15.2, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "bca95ca51", + "number": "7036" } }, { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 338.71, - "tps_std": 3.32, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 25.51, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 347.28, - "tps_std": 5.57, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 25.86, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 439.13, - "tps_std": 4.42, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 25.54, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 482.81, - "tps_std": 7.55, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 25.85, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 334.68, - "tps_std": 2.49, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 25.54, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 346.28, - "tps_std": 2.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 25.8, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 216.27, - "tps_std": 0.39, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 10.07, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 216.46, - "tps_std": 0.31, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 10.0, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 163.35, - "tps_std": 0.2, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 9.24, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 166.05, - "tps_std": 0.25, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 9.29, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 562.46, - "tps_std": 5.25, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 55.16, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 626.72, - "tps_std": 6.27, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 57.04, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 589.82, - "tps_std": 5.37, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 55.38, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 620.07, - "tps_std": 8.69, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 56.88, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 556.95, - "tps_std": 4.88, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 54.93, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 632.67, - "tps_std": 5.74, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 56.81, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 582.53, - "tps_std": 3.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 55.41, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 629.19, - "tps_std": 4.25, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 56.94, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 557.13, - "tps_std": 5.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 55.6, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", "env": "rocm7_rc-rocwmma", "env_base": "rocm7_rc", "env_variant": "rocwmma", "fa": true, - "test": "pp512", - "tps_mean": 623.1, - "tps_std": 4.22, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 27.94, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log", + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "bca95ca51", + "number": "7036" } }, { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", "env": "rocm7_rc-rocwmma", "env_base": "rocm7_rc", "env_variant": "rocwmma", "fa": true, - "test": "tg128", - "tps_mean": 56.95, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 586.15, - "tps_std": 3.69, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 55.49, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 619.92, - "tps_std": 6.18, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 57.09, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 2.94, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "bca95ca51", + "number": "7036" } }, { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc", + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 553.07, - "tps_std": 3.87, + "tps_mean": 142.7, + "tps_std": 0.16, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log", + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "bca95ca51", + "number": "7036" } }, { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc", + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 55.49, + "tps_mean": 15.19, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log", + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "bca95ca51", + "number": "7036" } }, { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 28.48, + "tps_std": 0.0, + "error": true, + "error_type": "hang", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "build": null + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", "env": "rocm7_rc", "env_base": "rocm7_rc", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 632.51, - "tps_std": 3.87, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 56.97, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 588.1, - "tps_std": 4.51, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 55.49, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 631.26, - "tps_std": 5.65, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 56.97, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 1053.02, - "tps_std": 7.31, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 63.84, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 1020.41, - "tps_std": 5.76, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 59.42, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 823.6, - "tps_std": 3.91, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 64.74, - "tps_std": 0.13, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 857.47, - "tps_std": 4.38, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 63.41, - "tps_std": 0.08, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 614.24, - "tps_std": 6.08, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 66.41, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 662.07, - "tps_std": 2.63, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 68.4, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 620.19, - "tps_std": 1.89, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 66.33, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 659.42, - "tps_std": 4.75, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 68.39, + "tps_mean": 145.17, "tps_std": 0.07, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1.log", + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "1c398dc9e", + "number": "7034" } }, { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 15.03, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 39.5, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 9.24, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 143.77, + "tps_std": 0.88, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 15.01, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 235.0, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log", + "build": null + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 139.19, + "tps_std": 0.25, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 17.45, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 16.94, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.42, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 128.55, + "tps_std": 1.17, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 18.47, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 22.82, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 6.88, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 478.1, + "tps_std": 4.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log", + "build": { + "hash": "31df4608", + "number": "7038" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 26.92, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log", + "build": { + "hash": "31df4608", + "number": "7038" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 197.19, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "build": { + "hash": "31df4608", + "number": "7038" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 19.06, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "build": { + "hash": "31df4608", + "number": "7038" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 483.01, + "tps_std": 4.72, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 27.05, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 147.27, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 7.44, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 482.27, + "tps_std": 5.93, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 27.04, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 164.31, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 19.02, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 479.75, + "tps_std": 5.18, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 26.9, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 107.91, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 12.52, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 360.25, + "tps_std": 7.4, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 26.84, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 107.25, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 12.5, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", "env": "rocm6_4_4", "env_base": "rocm6_4_4", "env_variant": null, - "fa": false, + "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 594.0, + "tps_mean": 493.29, + "tps_std": 1.77, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 26.72, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 208.98, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 18.97, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 362.53, + "tps_std": 2.36, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 26.8, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 244.87, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 19.02, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 484.23, + "tps_std": 1.92, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 27.12, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 131.05, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 12.59, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 357.45, + "tps_std": 1.2, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 27.18, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 127.49, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 12.57, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 485.6, + "tps_std": 4.06, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 26.98, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 174.51, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 18.95, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 360.51, + "tps_std": 0.53, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 26.98, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 174.18, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 18.94, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 472.32, + "tps_std": 1.65, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 27.13, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 129.44, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 12.59, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 348.39, + "tps_std": 4.07, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 27.16, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 125.66, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 12.6, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 487.44, + "tps_std": 3.08, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 26.97, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 194.86, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 19.04, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 350.49, + "tps_std": 4.28, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 27.05, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 174.05, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 18.97, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 191.31, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 9.96, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 53.97, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 7.47, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 165.85, + "tps_std": 0.14, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 9.35, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 73.78, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 7.95, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 573.35, + "tps_std": 5.61, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__fa1.log", + "build": { + "hash": "31df4608", + "number": "7038" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 57.97, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__fa1.log", + "build": { + "hash": "31df4608", + "number": "7038" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 201.75, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "build": { + "hash": "31df4608", + "number": "7038" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 30.78, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "build": { + "hash": "31df4608", + "number": "7038" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 568.92, + "tps_std": 3.37, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 58.4, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 147.26, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 8.73, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 574.31, + "tps_std": 5.95, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 58.21, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 160.06, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 30.7, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 622.81, + "tps_std": 3.95, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 57.81, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 109.56, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 16.89, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 625.44, + "tps_std": 4.55, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 57.89, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 108.66, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 16.92, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 640.29, + "tps_std": 6.85, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 56.58, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 203.61, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 30.59, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 632.09, + "tps_std": 4.14, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 57.49, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 204.05, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 30.58, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 620.61, + "tps_std": 2.27, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 57.85, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 131.7, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 16.9, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 617.37, + "tps_std": 6.53, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 57.82, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 132.17, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 16.89, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 628.16, + "tps_std": 1.9, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 57.33, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 167.01, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 30.22, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 628.24, + "tps_std": 3.85, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 57.33, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 169.2, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 30.2, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 625.38, + "tps_std": 1.39, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 58.11, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 131.65, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 16.88, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 616.46, + "tps_std": 1.4, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 57.69, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 132.89, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 16.87, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 625.22, + "tps_std": 5.42, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 57.35, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 168.61, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 30.27, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 626.37, "tps_std": 6.13, "error": false, "error_type": null, @@ -9079,24 +14820,350 @@ "ngl": 99, "mmap": 0, "params_b": 30.53, - "file_size_gib": 17.35, + "file_size_gib": 24.53, "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4.log", + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 57.38, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 170.36, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 30.25, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1013.46, + "tps_std": 4.96, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 62.1, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 69.7, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 20.21, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 853.23, + "tps_std": 3.21, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 66.93, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 104.06, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 29.97, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 673.5, + "tps_std": 8.17, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__fa1.log", + "build": { + "hash": "31df4608", + "number": "7038" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 66.17, + "tps_mean": 70.76, "tps_std": 0.02, "error": false, "error_type": null, @@ -9107,22 +15174,24 @@ "file_size_gib": 17.35, "name_params_b": 30.53, "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4.log", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "31df4608", + "number": "7038" } }, { "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", "fa": true, - "test": "pp512", - "tps_mean": 665.68, - "tps_std": 5.62, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 204.86, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -9132,22 +15201,24 @@ "file_size_gib": 17.35, "name_params_b": 30.53, "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "31df4608", + "number": "7038" } }, { "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", "fa": true, - "test": "tg128", - "tps_mean": 68.36, - "tps_std": 0.02, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 33.65, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -9157,22 +15228,24 @@ "file_size_gib": 17.35, "name_params_b": 30.53, "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "31df4608", + "number": "7038" } }, { "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 610.15, - "tps_std": 6.46, + "tps_mean": 674.15, + "tps_std": 10.24, "error": false, "error_type": null, "backend": "ROCm", @@ -9182,21 +15255,23 @@ "file_size_gib": 17.35, "name_params_b": 30.53, "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0.log", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 66.15, + "tps_mean": 71.14, "tps_std": 0.01, "error": false, "error_type": null, @@ -9207,22 +15282,186 @@ "file_size_gib": 17.35, "name_params_b": 30.53, "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0.log", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4-hblt0", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 150.94, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 8.92, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 675.1, + "tps_std": 3.41, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 71.06, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 161.39, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 33.57, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm6_4_4-rocwmma", "env_base": "rocm6_4_4", - "env_variant": "hblt0", + "env_variant": "rocwmma", "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 662.57, - "tps_std": 4.91, + "tps_mean": 663.26, + "tps_std": 2.04, "error": false, "error_type": null, "backend": "ROCm", @@ -9232,171 +15471,23 @@ "file_size_gib": 17.35, "name_params_b": 30.53, "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4-hblt0", + "env": "rocm6_4_4-rocwmma", "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 68.26, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 606.51, - "tps_std": 6.65, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 66.58, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 652.79, - "tps_std": 5.72, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", "env_variant": "rocwmma", "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 68.7, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 616.56, - "tps_std": 6.18, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 66.64, + "tps_mean": 68.79, "tps_std": 0.03, "error": false, "error_type": null, @@ -9407,22 +15498,78 @@ "file_size_gib": 17.35, "name_params_b": 30.53, "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0.log", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 108.11, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 17.39, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", "env_variant": "rocwmma-hblt0", "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 655.09, - "tps_std": 6.42, + "tps_mean": 655.75, + "tps_std": 5.39, "error": false, "error_type": null, "backend": "ROCm", @@ -9432,22 +15579,24 @@ "file_size_gib": 17.35, "name_params_b": 30.53, "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1.log", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", "env_variant": "rocwmma-hblt0", "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 68.71, - "tps_std": 0.09, + "tps_mean": 68.7, + "tps_std": 0.04, "error": false, "error_type": null, "backend": "ROCm", @@ -9457,22 +15606,24 @@ "file_size_gib": 17.35, "name_params_b": 30.53, "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1.log", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 606.05, - "tps_std": 4.7, + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 109.44, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -9482,22 +15633,24 @@ "file_size_gib": 17.35, "name_params_b": 30.53, "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc.log", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 66.71, - "tps_std": 0.01, + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 17.45, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -9507,22 +15660,24 @@ "file_size_gib": 17.35, "name_params_b": 30.53, "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc.log", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7_rc", - "env_base": "rocm7_rc", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 662.07, - "tps_std": 3.18, + "tps_mean": 674.37, + "tps_std": 11.18, "error": false, "error_type": null, "backend": "ROCm", @@ -9532,71 +15687,23 @@ "file_size_gib": 17.35, "name_params_b": 30.53, "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1.log", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7_rc", - "env_base": "rocm7_rc", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 68.49, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 616.68, - "tps_std": 3.21, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 66.75, + "tps_mean": 67.62, "tps_std": 0.02, "error": false, "error_type": null, @@ -9607,22 +15714,24 @@ "file_size_gib": 17.35, "name_params_b": 30.53, "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0.log", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, "fa": true, - "test": "pp512", - "tps_mean": 660.44, - "tps_std": 3.81, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 204.29, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -9632,21 +15741,833 @@ "file_size_gib": 17.35, "name_params_b": 30.53, "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1.log", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 33.04, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", "env_variant": "hblt0", "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 665.28, + "tps_std": 7.13, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 68.74, + "tps_mean": 68.57, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 204.17, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 32.97, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 649.91, + "tps_std": 5.41, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 69.03, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 132.49, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 17.48, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 658.06, + "tps_std": 8.1, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 69.11, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 132.6, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 17.5, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 662.81, + "tps_std": 8.45, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 68.77, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 167.05, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 32.58, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 660.13, + "tps_std": 8.26, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 68.73, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 169.5, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 32.56, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 650.55, + "tps_std": 3.12, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 69.01, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 132.66, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 17.46, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 647.31, + "tps_std": 2.83, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 69.01, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 131.47, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 17.52, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 660.75, + "tps_std": 2.92, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 68.92, "tps_std": 0.01, "error": false, "error_type": null, @@ -9657,60 +16578,172 @@ "file_size_gib": 17.35, "name_params_b": 30.53, "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 166.44, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 32.63, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 662.51, + "tps_std": 3.72, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 793.9, - "tps_std": 3.33, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 83.96, - "tps_std": 0.15, + "tps_mean": 68.75, + "tps_std": 0.0, "error": false, "error_type": null, - "backend": "Vulkan", + "backend": "ROCm", "ngl": 99, "mmap": 0, "params_b": 30.53, "file_size_gib": 17.35, "name_params_b": 30.53, "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk.log", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 170.1, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 32.53, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -9720,9 +16753,11 @@ "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 773.64, - "tps_std": 3.79, + "tps_mean": 774.61, + "tps_std": 2.12, "error": false, "error_type": null, "backend": "Vulkan", @@ -9734,8 +16769,8 @@ "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -9745,9 +16780,11 @@ "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 77.82, - "tps_std": 0.06, + "tps_mean": 81.31, + "tps_std": 0.07, "error": false, "error_type": null, "backend": "Vulkan", @@ -9759,20 +16796,22 @@ "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "vulkan_radv", - "env_base": "vulkan_radv", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 800.63, - "tps_std": 2.65, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 68.3, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", @@ -9782,22 +16821,24 @@ "file_size_gib": 17.35, "name_params_b": 30.53, "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv.log", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "vulkan_radv", - "env_base": "vulkan_radv", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 83.73, - "tps_std": 0.11, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 21.82, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", @@ -9807,10 +16848,10 @@ "file_size_gib": 17.35, "name_params_b": 30.53, "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv.log", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -9820,9 +16861,11 @@ "env_base": "vulkan_radv", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 832.99, - "tps_std": 3.06, + "tps_mean": 832.44, + "tps_std": 3.18, "error": false, "error_type": null, "backend": "Vulkan", @@ -9834,8 +16877,8 @@ "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -9845,9 +16888,11 @@ "env_base": "vulkan_radv", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 81.4, - "tps_std": 0.12, + "tps_mean": 87.24, + "tps_std": 0.18, "error": false, "error_type": null, "backend": "Vulkan", @@ -9859,20 +16904,76 @@ "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 100.73, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 33.3, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "gemma-3-12b-it-UD-Q8_K_XL", "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": false, + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 747.69, - "tps_std": 1.06, + "tps_mean": 751.05, + "tps_std": 61.73, "error": false, "error_type": null, "backend": "ROCm", @@ -9882,21 +16983,23 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "31df4608", + "number": "7038" } }, { "model": "gemma-3-12b-it-UD-Q8_K_XL", "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": false, + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 14.17, + "tps_mean": 14.21, "tps_std": 0.0, "error": false, "error_type": null, @@ -9907,46 +17010,23 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "31df4608", + "number": "7038" } }, { "model": "gemma-3-12b-it-UD-Q8_K_XL", "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", "fa": true, - "test": "pp512", - "tps_mean": 809.2, - "tps_std": 0.75, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 14.11, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 323.48, "tps_std": 0.0, "error": false, "error_type": null, @@ -9957,22 +17037,24 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "31df4608", + "number": "7038" } }, { "model": "gemma-3-12b-it-UD-Q8_K_XL", "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 781.56, - "tps_std": 1.57, + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 11.58, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -9982,19 +17064,48 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "31df4608", + "number": "7038" } }, { "model": "gemma-3-12b-it-UD-Q8_K_XL", "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 795.35, + "tps_std": 0.84, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", "tps_mean": 14.16, "tps_std": 0.0, @@ -10007,46 +17118,23 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "gemma-3-12b-it-UD-Q8_K_XL", "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", "fa": true, - "test": "pp512", - "tps_mean": 819.61, - "tps_std": 0.83, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 14.11, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 354.28, "tps_std": 0.0, "error": false, "error_type": null, @@ -10057,22 +17145,24 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx32768.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "gemma-3-12b-it-UD-Q8_K_XL", "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 720.89, - "tps_std": 0.7, + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 10.03, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -10082,19 +17172,264 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx32768.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "gemma-3-12b-it-UD-Q8_K_XL", "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 816.42, + "tps_std": 1.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.25, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 345.3, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 11.59, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma", "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 811.49, + "tps_std": 0.16, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.14, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 151.46, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 10.18, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 819.41, + "tps_std": 1.64, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", "tps_mean": 14.15, "tps_std": 0.0, @@ -10107,10 +17442,64 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 149.96, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 10.19, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -10120,9 +17509,11 @@ "env_base": "rocm6_4_4", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 815.58, - "tps_std": 1.47, + "tps_mean": 826.24, + "tps_std": 1.79, "error": false, "error_type": null, "backend": "ROCm", @@ -10134,8 +17525,8 @@ "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -10145,8 +17536,10 @@ "env_base": "rocm6_4_4", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 14.08, + "tps_mean": 14.24, "tps_std": 0.0, "error": false, "error_type": null, @@ -10159,44 +17552,21 @@ "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "gemma-3-12b-it-UD-Q8_K_XL", "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", + "env": "rocm6_4_4", "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 772.99, - "tps_std": 2.45, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 14.15, + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 267.36, "tps_std": 0.0, "error": false, "error_type": null, @@ -10207,10 +17577,37 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 11.56, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -10220,9 +17617,11 @@ "env_base": "rocm6_4_4", "env_variant": "hblt0", "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 824.58, - "tps_std": 1.69, + "tps_mean": 833.1, + "tps_std": 1.71, "error": false, "error_type": null, "backend": "ROCm", @@ -10234,8 +17633,8 @@ "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -10245,8 +17644,10 @@ "env_base": "rocm6_4_4", "env_variant": "hblt0", "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 14.1, + "tps_mean": 14.21, "tps_std": 0.0, "error": false, "error_type": null, @@ -10259,8 +17660,494 @@ "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 252.65, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 11.56, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 797.32, + "tps_std": 10.12, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.16, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 180.48, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 10.2, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 810.77, + "tps_std": 1.82, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.18, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 166.1, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 10.2, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 819.82, + "tps_std": 2.06, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.25, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 269.95, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 11.59, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 832.48, + "tps_std": 2.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.26, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 266.41, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 11.58, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" } }, { @@ -10269,9 +18156,11 @@ "env": "rocm7_rc-rocwmma", "env_base": "rocm7_rc", "env_variant": "rocwmma", - "fa": false, + "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 734.9, + "tps_mean": 802.23, "tps_std": 0.79, "error": false, "error_type": null, @@ -10282,60 +18171,10 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 14.17, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 799.43, - "tps_std": 1.14, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "bca95ca51", + "number": "7036" } }, { @@ -10345,156 +18184,8 @@ "env_base": "rocm7_rc", "env_variant": "rocwmma", "fa": true, - "test": "tg128", - "tps_mean": 14.11, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 762.49, - "tps_std": 1.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 14.17, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 788.46, - "tps_std": 1.56, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 14.13, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 734.16, - "tps_std": 0.84, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, + "context": "default", + "context_tokens": null, "test": "tg128", "tps_mean": 14.16, "tps_std": 0.0, @@ -10507,46 +18198,23 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "bca95ca51", + "number": "7036" } }, { "model": "gemma-3-12b-it-UD-Q8_K_XL", "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc", + "env": "rocm7_rc-rocwmma", "env_base": "rocm7_rc", - "env_variant": null, + "env_variant": "rocwmma", "fa": true, - "test": "pp512", - "tps_mean": 816.86, - "tps_std": 0.8, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 14.13, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 167.45, "tps_std": 0.0, "error": false, "error_type": null, @@ -10557,10 +18225,253 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 10.21, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 790.38, + "tps_std": 0.48, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.16, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 170.65, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 10.2, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 813.63, + "tps_std": 8.49, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.21, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 265.54, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 11.59, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -10569,473 +18480,781 @@ "env": "rocm7_rc-hblt0", "env_base": "rocm7_rc", "env_variant": "hblt0", - "fa": false, + "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 763.42, + "tps_mean": 813.23, + "tps_std": 0.99, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.26, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 252.49, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 11.59, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 590.41, + "tps_std": 71.66, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.51, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 17.29, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 11.88, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 533.84, + "tps_std": 0.83, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 13.99, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 219.21, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 9.99, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 439.12, + "tps_std": 31.65, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log", + "build": { + "hash": "31df4608", + "number": "7038" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 4.01, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log", + "build": { + "hash": "31df4608", + "number": "7038" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 112.92, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "build": { + "hash": "31df4608", + "number": "7038" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.72, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "build": { + "hash": "31df4608", + "number": "7038" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 428.59, + "tps_std": 48.51, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 4.02, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 93.71, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.11, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 409.77, + "tps_std": 60.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 4.02, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 192.3, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.57, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 469.22, + "tps_std": 0.41, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 4.01, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 92.1, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.42, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 524.21, "tps_std": 1.37, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 14.17, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 806.7, - "tps_std": 1.39, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 14.12, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 668.85, - "tps_std": 1.34, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 14.64, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 648.34, - "tps_std": 0.61, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 14.52, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 541.39, - "tps_std": 3.33, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 14.17, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 532.11, - "tps_std": 3.15, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 13.98, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 420.14, - "tps_std": 0.69, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 4.1, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 468.87, - "tps_std": 0.84, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 4.11, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 477.22, - "tps_std": 0.76, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 4.1, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 524.62, - "tps_std": 0.55, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, "params_b": 27.01, "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -11045,8 +19264,10 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma-hblt0", "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 4.11, + "tps_mean": 4.01, "tps_std": 0.0, "error": false, "error_type": null, @@ -11059,44 +19280,21 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "gemma-3-27b-it-BF16-00001-of-00002", "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_4", + "env": "rocm6_4_4-rocwmma-hblt0", "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 413.24, - "tps_std": 0.72, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 4.1, + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 93.57, "tps_std": 0.0, "error": false, "error_type": null, @@ -11107,10 +19305,37 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.42, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -11120,9 +19345,11 @@ "env_base": "rocm6_4_4", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 471.95, - "tps_std": 1.68, + "tps_mean": 472.47, + "tps_std": 0.58, "error": false, "error_type": null, "backend": "ROCm", @@ -11134,8 +19361,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -11145,356 +19372,8 @@ "env_base": "rocm6_4_4", "env_variant": null, "fa": true, - "test": "tg128", - "tps_mean": 4.1, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 471.17, - "tps_std": 0.97, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 4.1, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 529.49, - "tps_std": 1.2, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 4.1, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 421.4, - "tps_std": 0.43, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 4.1, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 464.58, - "tps_std": 0.58, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 4.11, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 458.08, - "tps_std": 0.81, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 4.1, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 499.11, - "tps_std": 0.58, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 4.11, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 413.95, - "tps_std": 0.6, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, + "context": "default", + "context_tokens": null, "test": "tg128", "tps_mean": 4.0, "tps_std": 0.0, @@ -11507,10 +19386,820 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 182.47, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.72, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 530.73, + "tps_std": 0.34, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 4.0, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 204.11, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.72, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 463.62, + "tps_std": 0.34, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 4.04, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 113.46, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.43, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 522.69, + "tps_std": 0.87, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 4.04, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 115.46, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.43, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 434.79, + "tps_std": 46.53, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 4.04, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 179.1, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.74, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 524.39, + "tps_std": 1.39, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 4.04, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 195.6, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.74, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 463.45, + "tps_std": 0.58, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 4.04, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 111.71, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.44, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 499.44, + "tps_std": 0.09, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 4.04, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 115.59, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.44, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" } }, { @@ -11520,9 +20209,11 @@ "env_base": "rocm7_rc", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 469.08, - "tps_std": 0.27, + "tps_mean": 470.06, + "tps_std": 0.56, "error": false, "error_type": null, "backend": "ROCm", @@ -11534,8 +20225,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -11545,8 +20236,10 @@ "env_base": "rocm7_rc", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 4.11, + "tps_mean": 4.04, "tps_std": 0.0, "error": false, "error_type": null, @@ -11559,44 +20252,21 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "gemma-3-27b-it-BF16-00001-of-00002", "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc-hblt0", + "env": "rocm7_rc", "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 457.65, - "tps_std": 0.59, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 4.1, + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 177.69, "tps_std": 0.0, "error": false, "error_type": null, @@ -11607,10 +20277,37 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1__longctx32768.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.74, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -11620,9 +20317,11 @@ "env_base": "rocm7_rc", "env_variant": "hblt0", "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 503.26, - "tps_std": 0.79, + "tps_mean": 501.79, + "tps_std": 0.45, "error": false, "error_type": null, "backend": "ROCm", @@ -11634,8 +20333,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -11645,8 +20344,10 @@ "env_base": "rocm7_rc", "env_variant": "hblt0", "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 4.11, + "tps_mean": 4.04, "tps_std": 0.0, "error": false, "error_type": null, @@ -11659,32 +20360,64 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 198.39, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.74, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" } }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "load", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk.log", - "build": null - }, { "model": "gemma-3-27b-it-BF16-00001-of-00002", "model_clean": "gemma-3-27b-it-BF16", @@ -11692,6 +20425,8 @@ "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": null, "tps_mean": null, "tps_std": null, @@ -11710,52 +20445,26 @@ { "model": "gemma-3-27b-it-BF16-00001-of-00002", "model_clean": "gemma-3-27b-it-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 103.58, - "tps_std": 1.09, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "load", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 3.93, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "build": null }, { "model": "gemma-3-27b-it-BF16-00001-of-00002", @@ -11764,8 +20473,10 @@ "env_base": "vulkan_radv", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 107.33, + "tps_mean": 106.82, "tps_std": 1.0, "error": false, "error_type": null, @@ -11778,8 +20489,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -11789,8 +20500,10 @@ "env_base": "vulkan_radv", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 3.91, + "tps_mean": 3.92, "tps_std": 0.0, "error": false, "error_type": null, @@ -11803,20 +20516,76 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 62.49, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.63, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "gemma-3-4b-it-Q3_K_S", "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": false, + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 1935.04, - "tps_std": 3.89, + "tps_mean": 2224.91, + "tps_std": 1.45, "error": false, "error_type": null, "backend": "ROCm", @@ -11826,21 +20595,239 @@ "file_size_gib": 1.8, "name_params_b": 3.88, "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma.log", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "31df4608", + "number": "7038" } }, { "model": "gemma-3-4b-it-Q3_K_S", "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": false, + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 79.17, + "tps_mean": 75.58, + "tps_std": 9.31, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__fa1.log", + "build": { + "hash": "31df4608", + "number": "7038" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 1239.19, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "build": { + "hash": "31df4608", + "number": "7038" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 59.92, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "build": { + "hash": "31df4608", + "number": "7038" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 2245.25, + "tps_std": 4.85, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 75.82, + "tps_std": 8.64, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 1224.83, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 58.09, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 2256.38, + "tps_std": 8.98, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 84.67, "tps_std": 0.01, "error": false, "error_type": null, @@ -11851,10 +20838,64 @@ "file_size_gib": 1.8, "name_params_b": 3.88, "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma.log", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 1206.03, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 59.51, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -11864,9 +20905,11 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma", "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 2278.78, - "tps_std": 8.79, + "tps_mean": 2283.48, + "tps_std": 2.94, "error": false, "error_type": null, "backend": "ROCm", @@ -11878,8 +20921,8 @@ "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -11889,9 +20932,11 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma", "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 76.94, - "tps_std": 0.01, + "tps_mean": 78.74, + "tps_std": 0.13, "error": false, "error_type": null, "backend": "ROCm", @@ -11903,20 +20948,22 @@ "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "gemma-3-4b-it-Q3_K_S", "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4-rocwmma-hblt0", + "env": "rocm6_4_4-rocwmma", "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 2072.56, - "tps_std": 8.2, + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 898.63, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -11926,10 +20973,37 @@ "file_size_gib": 1.8, "name_params_b": 3.88, "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0.log", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1__longctx32768.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 57.15, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -11938,9 +21012,146 @@ "env": "rocm6_4_4-rocwmma-hblt0", "env_base": "rocm6_4_4", "env_variant": "rocwmma-hblt0", - "fa": false, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 2154.45, + "tps_std": 10.83, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 79.03, + "tps_mean": 76.62, + "tps_std": 3.81, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 855.04, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 57.04, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 2276.8, + "tps_std": 11.52, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 82.07, "tps_std": 0.02, "error": false, "error_type": null, @@ -11951,872 +21162,2049 @@ "file_size_gib": 1.8, "name_params_b": 3.88, "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0.log", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "gemma-3-4b-it-Q3_K_S", "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4-rocwmma-hblt0", + "env": "rocm6_4_4", "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 1497.53, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 58.57, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 2161.24, + "tps_std": 6.51, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 82.35, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 1440.1, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 58.38, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 2269.02, + "tps_std": 4.71, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 59.93, + "tps_std": 6.59, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 1031.65, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 56.77, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", "env_variant": "rocwmma-hblt0", "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 2158.84, + "tps_mean": 2141.35, + "tps_std": 2.64, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 64.63, + "tps_std": 11.41, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 1002.59, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 56.91, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 2261.65, + "tps_std": 12.88, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 62.69, + "tps_std": 7.43, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 1160.5, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 58.25, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 2171.0, + "tps_std": 3.48, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 65.68, + "tps_std": 10.35, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 1240.47, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 58.34, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 2257.61, + "tps_std": 5.08, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 78.84, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 1046.3, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 56.97, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 2102.34, + "tps_std": 8.12, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 67.4, + "tps_std": 10.3, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 1033.75, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 56.84, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 2275.52, + "tps_std": 10.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 72.45, + "tps_std": 10.56, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 1168.34, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 58.08, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 2114.7, + "tps_std": 2.89, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 68.07, + "tps_std": 12.18, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 1227.13, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 58.33, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1229.75, + "tps_std": 236.47, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 75.94, + "tps_std": 2.23, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 145.82, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 64.34, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1116.46, + "tps_std": 204.92, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 78.27, + "tps_std": 2.29, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 646.29, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 45.91, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 669.82, "tps_std": 4.74, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1.log", + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "31df4608", + "number": "7038" } }, { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 37.4, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__fa1.log", + "build": { + "hash": "31df4608", + "number": "7038" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 325.08, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "build": { + "hash": "31df4608", + "number": "7038" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 28.4, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "build": { + "hash": "31df4608", + "number": "7038" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 656.31, + "tps_std": 30.59, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 37.35, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 223.34, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 17.27, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 667.33, + "tps_std": 4.16, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm-7alpha__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 37.43, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm-7alpha__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 262.9, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm-7alpha__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 28.21, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm-7alpha__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 778.24, + "tps_std": 5.71, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 37.19, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 301.71, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 17.25, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", "env": "rocm6_4_4-rocwmma-hblt0", "env_base": "rocm6_4_4", "env_variant": "rocwmma-hblt0", "fa": true, - "test": "tg128", - "tps_mean": 77.11, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 1896.32, - "tps_std": 6.0, + "tps_mean": 783.56, + "tps_std": 11.04, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4.log", + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" } }, { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4", + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_4-rocwmma-hblt0", "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 79.32, - "tps_std": 0.08, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, + "env_variant": "rocwmma-hblt0", "fa": true, - "test": "pp512", - "tps_mean": 2261.52, - "tps_std": 12.45, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 37.18, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log", + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" } }, { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4", + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_4-rocwmma-hblt0", "env_base": "rocm6_4_4", - "env_variant": null, + "env_variant": "rocwmma-hblt0", "fa": true, - "test": "tg128", - "tps_mean": 77.18, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 276.51, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log", + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" } }, { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4-hblt0", + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_4-rocwmma-hblt0", "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 2038.57, - "tps_std": 4.97, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 79.42, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", + "env_variant": "rocwmma-hblt0", "fa": true, - "test": "pp512", - "tps_mean": 2127.98, - "tps_std": 4.53, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 77.17, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 16.58, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log", + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" } }, { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 1858.93, - "tps_std": 11.45, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 79.18, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 2249.97, - "tps_std": 8.38, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 77.23, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 2045.87, - "tps_std": 7.65, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 79.18, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 2110.98, - "tps_std": 11.29, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 77.03, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 1865.48, - "tps_std": 5.97, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 79.26, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc", - "env_base": "rocm7_rc", + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 2265.97, - "tps_std": 12.2, + "tps_mean": 779.12, + "tps_std": 1.84, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log", + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_4__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc", - "env_base": "rocm7_rc", + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 77.23, - "tps_std": 0.01, + "tps_mean": 36.55, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log", + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_4__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 2034.18, - "tps_std": 7.8, + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 335.09, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0.log", + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_4__fa1__longctx32768.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 79.2, - "tps_std": 0.01, + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 21.9, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0.log", + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_4__fa1__longctx32768.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", "env_variant": "hblt0", "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 2104.47, - "tps_std": 6.9, + "tps_mean": 774.77, + "tps_std": 13.26, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log", + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_4__hblt0__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", "env_variant": "hblt0", "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 77.16, - "tps_std": 0.02, + "tps_mean": 37.2, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log", + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_4__hblt0__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 1417.85, - "tps_std": 229.3, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 85.91, - "tps_std": 1.98, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", "fa": true, - "test": "pp512", - "tps_mean": 1193.42, - "tps_std": 154.0, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 454.32, + "tps_std": 0.0, "error": false, "error_type": null, - "backend": "Vulkan", + "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log", + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_4__hblt0__fa1__longctx32768.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", "fa": true, - "test": "tg128", - "tps_mean": 82.87, - "tps_std": 1.37, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 30.39, + "tps_std": 0.0, "error": false, "error_type": null, - "backend": "Vulkan", + "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log", + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_4__hblt0__fa1__longctx32768.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 1256.94, - "tps_std": 209.42, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 92.19, + "tps_mean": 769.93, "tps_std": 0.42, "error": false, "error_type": null, - "backend": "Vulkan", + "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log", + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7.1-rocwmma__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 1150.84, - "tps_std": 174.29, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 85.89, - "tps_std": 0.11, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "ee8dd5c65", + "number": "7035" } }, { "model": "gpt-oss-120b-F16", "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 667.15, - "tps_std": 5.65, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 37.36, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -12826,21 +23214,23 @@ "file_size_gib": 60.87, "name_params_b": 116.83, "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma.log", + "log": "results/gpt-oss-120b-F16__rocm7.1-rocwmma__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "ee8dd5c65", + "number": "7035" } }, { "model": "gpt-oss-120b-F16", "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 34.77, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 223.72, "tps_std": 0.0, "error": false, "error_type": null, @@ -12851,21 +23241,482 @@ "file_size_gib": 60.87, "name_params_b": 116.83, "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma.log", + "log": "results/gpt-oss-120b-F16__rocm7.1-rocwmma__fa1__longctx32768.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "ee8dd5c65", + "number": "7035" } }, { "model": "gpt-oss-120b-F16", "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", "env_variant": "rocwmma", "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 17.26, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7.1-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 786.49, + "tps_mean": 771.12, + "tps_std": 3.66, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7.1-rocwmma__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 37.39, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7.1-rocwmma__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 226.41, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 15.75, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 757.6, + "tps_std": 0.73, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7.1__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 37.06, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7.1__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 270.78, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7.1__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 29.89, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7.1__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 763.2, + "tps_std": 10.81, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7.1__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 37.04, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7.1__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 283.58, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7.1__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 29.9, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7.1__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 775.86, + "tps_std": 2.82, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 37.4, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 225.21, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 17.26, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 776.83, "tps_std": 4.02, "error": false, "error_type": null, @@ -12876,485 +23727,10 @@ "file_size_gib": 60.87, "name_params_b": 116.83, "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__fa1.log", + "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 35.16, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__fa1.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 700.13, - "tps_std": 3.54, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 34.79, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 783.5, - "tps_std": 5.44, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0__fa1.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 35.12, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0__fa1.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 650.45, - "tps_std": 2.11, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 32.9, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 790.9, - "tps_std": 4.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 34.98, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 687.78, - "tps_std": 5.65, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 34.8, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 792.0, - "tps_std": 9.77, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 35.14, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 660.37, - "tps_std": 3.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 34.8, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 770.55, - "tps_std": 4.47, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 35.07, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 698.86, - "tps_std": 6.39, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 34.8, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "bca95ca51", + "number": "7036" } }, { @@ -13364,9 +23740,11 @@ "env_base": "rocm7_rc", "env_variant": "rocwmma-hblt0", "fa": true, - "test": "pp512", - "tps_mean": 777.48, - "tps_std": 7.78, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 37.34, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -13378,8 +23756,8 @@ "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "bca95ca51", + "number": "7036" } }, { @@ -13389,9 +23767,11 @@ "env_base": "rocm7_rc", "env_variant": "rocwmma-hblt0", "fa": true, - "test": "tg128", - "tps_mean": 35.14, - "tps_std": 0.01, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 223.16, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -13401,22 +23781,24 @@ "file_size_gib": 60.87, "name_params_b": 116.83, "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log", + "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "bca95ca51", + "number": "7036" } }, { "model": "gpt-oss-120b-F16", "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc", + "env": "rocm7_rc-rocwmma-hblt0", "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 666.29, - "tps_std": 5.04, + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 13.23, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -13426,35 +23808,10 @@ "file_size_gib": 60.87, "name_params_b": 116.83, "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc.log", + "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 34.8, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc.log", - "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "bca95ca51", + "number": "7036" } }, { @@ -13464,56 +23821,38 @@ "env_base": "rocm7_rc", "env_variant": null, "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 750.63, + "tps_std": 5.94, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7_rc__fa1.log", - "build": null - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 702.07, - "tps_std": 4.76, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc__hblt0.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "gpt-oss-120b-F16", "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-hblt0", + "env": "rocm7_rc", "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 34.78, - "tps_std": 0.01, + "tps_mean": 37.04, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -13523,10 +23862,64 @@ "file_size_gib": 60.87, "name_params_b": 116.83, "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc__hblt0.log", + "log": "results/gpt-oss-120b-F16__rocm7_rc__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 281.34, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 29.87, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -13536,9 +23929,11 @@ "env_base": "rocm7_rc", "env_variant": "hblt0", "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 760.87, - "tps_std": 22.7, + "tps_mean": 756.08, + "tps_std": 9.81, "error": false, "error_type": null, "backend": "ROCm", @@ -13550,8 +23945,8 @@ "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -13561,8 +23956,10 @@ "env_base": "rocm7_rc", "env_variant": "hblt0", "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 35.07, + "tps_mean": 37.06, "tps_std": 0.01, "error": false, "error_type": null, @@ -13575,58 +23972,62 @@ "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "gpt-oss-120b-F16", "model_clean": "gpt-oss-120b-F16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 627.11, - "tps_std": 1.45, + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 284.0, + "tps_std": 0.0, "error": false, "error_type": null, - "backend": "Vulkan", + "backend": "ROCm", "ngl": 99, "mmap": 0, "params_b": 116.83, "file_size_gib": 60.87, "name_params_b": 116.83, "quant": "F16", - "log": "results/gpt-oss-120b-F16__vulkan_amdvlk.log", + "log": "results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1__longctx32768.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "gpt-oss-120b-F16", "model_clean": "gpt-oss-120b-F16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 35.32, - "tps_std": 0.02, + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 29.76, + "tps_std": 0.0, "error": false, "error_type": null, - "backend": "Vulkan", + "backend": "ROCm", "ngl": 99, "mmap": 0, "params_b": 116.83, "file_size_gib": 60.87, "name_params_b": 116.83, "quant": "F16", - "log": "results/gpt-oss-120b-F16__vulkan_amdvlk.log", + "log": "results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1__longctx32768.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -13636,9 +24037,11 @@ "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 719.39, - "tps_std": 2.63, + "tps_mean": 720.94, + "tps_std": 1.15, "error": false, "error_type": null, "backend": "Vulkan", @@ -13650,8 +24053,8 @@ "quant": "F16", "log": "results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -13661,8 +24064,10 @@ "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 34.71, + "tps_mean": 35.76, "tps_std": 0.02, "error": false, "error_type": null, @@ -13675,20 +24080,22 @@ "quant": "F16", "log": "results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "gpt-oss-120b-F16", "model_clean": "gpt-oss-120b-F16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 433.14, - "tps_std": 1.74, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 166.61, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", @@ -13698,22 +24105,24 @@ "file_size_gib": 60.87, "name_params_b": 116.83, "quant": "F16", - "log": "results/gpt-oss-120b-F16__vulkan_radv.log", + "log": "results/gpt-oss-120b-F16__vulkan_amdvlk__fa1__longctx32768.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "gpt-oss-120b-F16", "model_clean": "gpt-oss-120b-F16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 33.99, - "tps_std": 0.01, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 25.02, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", @@ -13723,10 +24132,10 @@ "file_size_gib": 60.87, "name_params_b": 116.83, "quant": "F16", - "log": "results/gpt-oss-120b-F16__vulkan_radv.log", + "log": "results/gpt-oss-120b-F16__vulkan_amdvlk__fa1__longctx32768.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -13736,9 +24145,11 @@ "env_base": "vulkan_radv", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 481.71, - "tps_std": 2.11, + "tps_mean": 513.71, + "tps_std": 2.7, "error": false, "error_type": null, "backend": "Vulkan", @@ -13750,8 +24161,8 @@ "quant": "F16", "log": "results/gpt-oss-120b-F16__vulkan_radv__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -13761,9 +24172,11 @@ "env_base": "vulkan_radv", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 34.46, - "tps_std": 0.02, + "tps_mean": 34.86, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", @@ -13775,220 +24188,76 @@ "quant": "F16", "log": "results/gpt-oss-120b-F16__vulkan_radv__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 653.32, - "tps_std": 7.07, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 47.09, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 767.28, - "tps_std": 2.81, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 47.63, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 703.72, - "tps_std": 4.21, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 47.05, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 773.91, - "tps_std": 4.34, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 47.61, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", "env_variant": null, - "fa": false, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 157.1, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__vulkan_radv__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 26.67, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__vulkan_radv__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 644.73, - "tps_std": 4.21, + "tps_mean": 673.11, + "tps_std": 6.92, "error": false, "error_type": null, "backend": "ROCm", @@ -13998,21 +24267,23 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "31df4608", + "number": "7038" } }, { "model": "gpt-oss-120b-mxfp4-00001-of-00003", "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 46.15, + "tps_mean": 52.11, "tps_std": 0.0, "error": false, "error_type": null, @@ -14023,22 +24294,24 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "31df4608", + "number": "7038" } }, { "model": "gpt-oss-120b-mxfp4-00001-of-00003", "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", "fa": true, - "test": "pp512", - "tps_mean": 766.09, - "tps_std": 8.12, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 331.82, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -14048,22 +24321,24 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "31df4608", + "number": "7038" } }, { "model": "gpt-oss-120b-mxfp4-00001-of-00003", "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", "fa": true, - "test": "tg128", - "tps_mean": 47.51, - "tps_std": 0.01, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 36.0, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -14073,271 +24348,23 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "31df4608", + "number": "7038" } }, { "model": "gpt-oss-120b-mxfp4-00001-of-00003", "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 660.34, - "tps_std": 48.62, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 46.72, - "tps_std": 0.39, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 780.39, - "tps_std": 3.58, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 47.7, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 651.94, - "tps_std": 3.45, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 47.17, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 756.58, - "tps_std": 4.67, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 47.62, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 700.53, - "tps_std": 1.99, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 47.17, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 772.03, + "tps_mean": 665.6, "tps_std": 9.61, "error": false, "error_type": null, @@ -14348,71 +24375,23 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "gpt-oss-120b-mxfp4-00001-of-00003", "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 47.64, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 650.2, - "tps_std": 4.73, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 47.07, + "tps_mean": 51.94, "tps_std": 0.01, "error": false, "error_type": null, @@ -14423,22 +24402,24 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "gpt-oss-120b-mxfp4-00001-of-00003", "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", "fa": true, - "test": "pp512", - "tps_mean": 755.62, - "tps_std": 4.68, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 224.65, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -14448,21 +24429,185 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "gpt-oss-120b-mxfp4-00001-of-00003", "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 19.73, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 652.18, + "tps_std": 8.35, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 47.7, + "tps_mean": 52.02, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 254.16, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 35.85, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 767.82, + "tps_std": 6.23, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.71, "tps_std": 0.01, "error": false, "error_type": null, @@ -14473,1160 +24618,3088 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "gpt-oss-120b-mxfp4-00001-of-00003", "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 698.26, - "tps_std": 2.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 47.05, - "tps_std": 0.12, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 773.2, - "tps_std": 7.58, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 47.65, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 681.25, - "tps_std": 3.69, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 51.65, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 788.46, - "tps_std": 4.36, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 50.32, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 464.26, - "tps_std": 2.62, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 52.85, - "tps_std": 0.16, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 526.13, - "tps_std": 3.2, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 52.9, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 1230.17, - "tps_std": 12.16, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_4-rocwmma.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 27.22, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_4-rocwmma.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", "env": "rocm6_4_4-rocwmma", "env_base": "rocm6_4_4", "env_variant": "rocwmma", "fa": true, - "test": "pp512", - "tps_mean": 1493.11, - "tps_std": 16.19, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 301.9, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__fa1.log", + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "1c398dc9e", + "number": "7034" } }, { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", "env": "rocm6_4_4-rocwmma", "env_base": "rocm6_4_4", "env_variant": "rocwmma", "fa": true, - "test": "tg128", - "tps_mean": 27.3, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 19.75, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__fa1.log", + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "1c398dc9e", + "number": "7034" } }, { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 1274.89, - "tps_std": 11.66, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 27.18, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", "env": "rocm6_4_4-rocwmma-hblt0", "env_base": "rocm6_4_4", "env_variant": "rocwmma-hblt0", "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 1460.62, - "tps_std": 17.09, + "tps_mean": 782.34, + "tps_std": 9.39, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0__fa1.log", + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "1c398dc9e", + "number": "7034" } }, { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", "env": "rocm6_4_4-rocwmma-hblt0", "env_base": "rocm6_4_4", "env_variant": "rocwmma-hblt0", "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 27.32, + "tps_mean": 51.76, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0__fa1.log", + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "1c398dc9e", + "number": "7034" } }, { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_4", + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_4-rocwmma-hblt0", "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 1163.88, - "tps_std": 56.1, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_4.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 25.78, + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 293.43, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_4.log", + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" } }, { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 19.44, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", "env": "rocm6_4_4", "env_base": "rocm6_4_4", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 1508.43, - "tps_std": 11.78, + "tps_mean": 762.33, + "tps_std": 0.82, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_4__fa1.log", + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" } }, { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", "env": "rocm6_4_4", "env_base": "rocm6_4_4", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 27.36, + "tps_mean": 51.67, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_4__fa1.log", + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" } }, { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_4-hblt0", + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_4", "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 1238.64, - "tps_std": 11.98, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 27.26, + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 341.35, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_4__hblt0.log", + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "1c398dc9e", + "number": "7034" } }, { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 39.61, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", "env": "rocm6_4_4-hblt0", "env_base": "rocm6_4_4", "env_variant": "hblt0", "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 1492.62, - "tps_std": 19.37, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_4__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 27.36, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_4__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 1220.88, - "tps_std": 18.41, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 27.18, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 1470.86, - "tps_std": 14.39, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 27.29, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 1252.31, - "tps_std": 14.38, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 27.17, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 1421.53, - "tps_std": 7.06, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 27.32, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 1228.62, - "tps_std": 4.47, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 27.17, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 1492.83, - "tps_std": 17.46, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 27.29, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 1263.37, - "tps_std": 8.5, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 27.18, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 1426.1, - "tps_std": 25.91, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 27.35, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 566.88, + "tps_mean": 778.37, "tps_std": 3.31, "error": false, "error_type": null, - "backend": "Vulkan", + "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__vulkan_amdvlk.log", + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.63, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 358.69, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 39.51, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 755.98, + "tps_std": 7.49, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.78, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 226.5, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 19.71, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 766.72, + "tps_std": 15.1, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.72, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 225.8, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 19.74, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 742.07, + "tps_std": 2.29, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.23, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 263.78, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 38.36, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 763.92, + "tps_std": 4.22, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.34, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 367.14, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 38.34, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 753.49, + "tps_std": 1.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.76, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 226.23, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 19.45, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 774.4, + "tps_std": 6.21, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.74, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 226.01, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 19.73, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 746.02, + "tps_std": 1.26, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.2, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 274.48, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 32.72, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 752.08, + "tps_std": 9.39, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.38, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 284.03, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 38.23, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", "env": "vulkan_amdvlk", "env_base": "vulkan_amdvlk", "env_variant": null, - "fa": false, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 759.84, + "tps_std": 2.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 18.39, + "tps_mean": 52.66, "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 169.89, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 32.63, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 534.51, + "tps_std": 0.52, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 54.67, + "tps_std": 0.1, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 159.72, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 37.31, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1430.02, + "tps_std": 3.53, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, "params_b": 20.91, "file_size_gib": 38.97, "name_params_b": 20.91, "quant": "F32", - "log": "results/gpt-oss-20b-F32__vulkan_amdvlk.log", + "log": "results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "31df4608", + "number": "7038" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 28.38, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__fa1.log", + "build": { + "hash": "31df4608", + "number": "7038" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 551.38, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "build": { + "hash": "31df4608", + "number": "7038" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 24.35, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "build": { + "hash": "31df4608", + "number": "7038" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1494.1, + "tps_std": 6.72, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 28.26, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 357.87, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 17.88, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1430.88, + "tps_std": 12.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm-7alpha__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 28.37, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm-7alpha__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 419.15, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm-7alpha__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 24.3, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm-7alpha__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1500.8, + "tps_std": 17.51, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 28.24, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 479.16, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 17.87, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1440.0, + "tps_std": 14.14, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 28.28, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 478.62, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 17.86, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1449.04, + "tps_std": 10.94, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm6_4_4__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 27.86, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm6_4_4__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 619.3, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm6_4_4__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 25.38, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm6_4_4__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1444.02, + "tps_std": 15.4, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm6_4_4__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 28.4, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm6_4_4__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 612.85, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm6_4_4__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 25.39, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm6_4_4__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1499.09, + "tps_std": 21.84, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7.1-rocwmma__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 28.4, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7.1-rocwmma__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 350.48, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7.1-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 17.91, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7.1-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1448.72, + "tps_std": 23.17, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7.1-rocwmma__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 28.37, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7.1-rocwmma__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 352.46, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 17.92, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1437.87, + "tps_std": 9.12, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7.1__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 28.25, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7.1__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 443.64, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7.1__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 25.05, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7.1__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1405.44, + "tps_std": 19.51, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7.1__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 28.27, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7.1__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 454.92, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7.1__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 25.1, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7.1__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1486.29, + "tps_std": 11.42, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 28.39, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 352.25, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 17.92, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1426.84, + "tps_std": 4.41, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 28.36, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 351.69, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 17.91, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1424.62, + "tps_std": 5.37, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7_rc__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 28.27, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7_rc__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 446.44, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7_rc__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 25.05, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7_rc__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1380.59, + "tps_std": 26.7, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 28.29, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 465.5, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 25.06, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -15636,9 +27709,11 @@ "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 609.37, - "tps_std": 2.58, + "tps_mean": 574.29, + "tps_std": 4.39, "error": false, "error_type": null, "backend": "Vulkan", @@ -15650,8 +27725,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -15661,9 +27736,11 @@ "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 18.25, - "tps_std": 0.01, + "tps_mean": 17.78, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", @@ -15675,20 +27752,22 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "gpt-oss-20b-F32", "model_clean": "gpt-oss-20b-F32", - "env": "vulkan_radv", - "env_base": "vulkan_radv", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 423.31, - "tps_std": 2.25, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 221.72, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", @@ -15698,22 +27777,24 @@ "file_size_gib": 38.97, "name_params_b": 20.91, "quant": "F32", - "log": "results/gpt-oss-20b-F32__vulkan_radv.log", + "log": "results/gpt-oss-20b-F32__vulkan_amdvlk__fa1__longctx32768.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "gpt-oss-20b-F32", "model_clean": "gpt-oss-20b-F32", - "env": "vulkan_radv", - "env_base": "vulkan_radv", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 16.82, - "tps_std": 0.02, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 15.61, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", @@ -15723,10 +27804,10 @@ "file_size_gib": 38.97, "name_params_b": 20.91, "quant": "F32", - "log": "results/gpt-oss-20b-F32__vulkan_radv.log", + "log": "results/gpt-oss-20b-F32__vulkan_amdvlk__fa1__longctx32768.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -15736,9 +27817,11 @@ "env_base": "vulkan_radv", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 451.11, - "tps_std": 2.96, + "tps_mean": 448.9, + "tps_std": 3.43, "error": false, "error_type": null, "backend": "Vulkan", @@ -15750,8 +27833,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__vulkan_radv__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -15761,8 +27844,10 @@ "env_base": "vulkan_radv", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 16.83, + "tps_mean": 16.15, "tps_std": 0.01, "error": false, "error_type": null, @@ -15775,20 +27860,76 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__vulkan_radv__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 243.39, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__vulkan_radv__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 14.76, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__vulkan_radv__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": false, + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 1247.4, - "tps_std": 5.38, + "tps_mean": 1333.81, + "tps_std": 9.84, "error": false, "error_type": null, "backend": "ROCm", @@ -15798,22 +27939,24 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma.log", + "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "31df4608", + "number": "7038" } }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": false, + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 67.13, - "tps_std": 0.02, + "tps_mean": 73.64, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -15823,10 +27966,280 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma.log", + "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "31df4608", + "number": "7038" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 537.64, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "build": { + "hash": "31df4608", + "number": "7038" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 51.44, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "build": { + "hash": "31df4608", + "number": "7038" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1384.08, + "tps_std": 13.61, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 73.16, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 349.88, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 28.88, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1326.8, + "tps_std": 18.91, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 73.5, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 398.06, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 51.13, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -15836,9 +28249,11 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma", "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 1529.26, - "tps_std": 3.68, + "tps_mean": 1510.54, + "tps_std": 2.64, "error": false, "error_type": null, "backend": "ROCm", @@ -15850,8 +28265,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -15861,9 +28276,11 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma", "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 68.05, - "tps_std": 0.01, + "tps_mean": 72.95, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -15875,20 +28292,22 @@ "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_4-rocwmma-hblt0", + "env": "rocm6_4_4-rocwmma", "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 1375.72, - "tps_std": 12.99, + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 474.83, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -15898,22 +28317,24 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0.log", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1__longctx32768.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_4-rocwmma-hblt0", + "env": "rocm6_4_4-rocwmma", "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 67.16, - "tps_std": 0.01, + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 28.83, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -15923,10 +28344,10 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0.log", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1__longctx32768.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -15936,9 +28357,11 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma-hblt0", "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 1585.34, - "tps_std": 7.29, + "tps_mean": 1598.15, + "tps_std": 4.85, "error": false, "error_type": null, "backend": "ROCm", @@ -15950,8 +28373,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1.log", "build": { - "hash": "11f0af55", - "number": "6736" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -15961,308 +28384,10 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma-hblt0", "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 68.08, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 1220.54, - "tps_std": 7.38, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 67.22, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 1534.52, - "tps_std": 6.57, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 68.16, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 1329.52, - "tps_std": 7.97, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 67.2, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 1601.11, - "tps_std": 22.8, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 68.21, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 1239.97, - "tps_std": 8.69, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 67.1, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 1506.28, - "tps_std": 15.62, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 67.98, + "tps_mean": 73.02, "tps_std": 0.03, "error": false, "error_type": null, @@ -16273,399 +28398,1160 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 477.4, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 28.86, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1510.09, + "tps_std": 10.37, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 73.09, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 518.94, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 56.02, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1572.54, + "tps_std": 11.37, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 72.96, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 554.2, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 56.02, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1511.0, + "tps_std": 19.49, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 73.04, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 345.98, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 28.89, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1559.27, + "tps_std": 17.65, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 73.16, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 349.08, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 28.83, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1476.67, + "tps_std": 4.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7.1__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 72.53, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7.1__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 418.15, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7.1__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 54.47, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7.1__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1526.6, + "tps_std": 21.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7.1__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 72.45, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7.1__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 431.87, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7.1__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 54.67, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7.1__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1507.89, + "tps_std": 10.23, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "bca95ca51", + "number": "7036" } }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-rocwmma-hblt0", + "env": "rocm7_rc-rocwmma", "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 1381.33, - "tps_std": 11.13, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 67.13, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 1563.47, - "tps_std": 11.28, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", + "env_variant": "rocwmma", "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 67.91, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 1239.41, - "tps_std": 5.44, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 67.1, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 1508.59, - "tps_std": 7.75, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 67.92, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 1368.12, - "tps_std": 12.11, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 67.09, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 1566.75, - "tps_std": 13.55, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 67.99, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 1515.08, - "tps_std": 10.36, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 74.59, - "tps_std": 0.07, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 1908.57, - "tps_std": 17.12, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 72.91, + "tps_mean": 73.07, "tps_std": 0.04, "error": false, "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 345.4, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 28.82, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1558.14, + "tps_std": 14.95, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 73.06, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 349.21, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 28.87, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1463.05, + "tps_std": 15.58, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 72.34, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 422.57, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 54.49, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1513.62, + "tps_std": 5.84, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 72.47, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 435.15, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 54.4, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1681.86, + "tps_std": 231.36, + "error": false, + "error_type": null, "backend": "Vulkan", "ngl": 99, "mmap": 0, @@ -16675,45 +29561,22 @@ "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 1097.23, - "tps_std": 7.32, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_radv.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, + "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 74.95, - "tps_std": 0.4, + "tps_mean": 75.38, + "tps_std": 0.03, "error": false, "error_type": null, "backend": "Vulkan", @@ -16723,10 +29586,64 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_radv.log", + "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 300.31, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 46.98, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -16736,9 +29653,11 @@ "env_base": "vulkan_radv", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 1340.77, - "tps_std": 10.85, + "tps_mean": 1354.58, + "tps_std": 9.42, "error": false, "error_type": null, "backend": "Vulkan", @@ -16750,8 +29669,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -16761,9 +29680,11 @@ "env_base": "vulkan_radv", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 75.19, - "tps_std": 0.11, + "tps_mean": 77.1, + "tps_std": 0.22, "error": false, "error_type": null, "backend": "Vulkan", @@ -16775,8 +29696,386 @@ "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 298.1, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 52.75, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1091.87, + "tps_std": 1.16, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__fa1.log", + "build": { + "hash": "31df4608", + "number": "7038" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 50.02, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__fa1.log", + "build": { + "hash": "31df4608", + "number": "7038" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 54.85, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "build": { + "hash": "31df4608", + "number": "7038" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7alpha-rocwmma-improved", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-improved", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 5.58, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "build": { + "hash": "31df4608", + "number": "7038" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1117.58, + "tps_std": 1.52, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 50.47, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 47.68, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 5.58, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1119.14, + "tps_std": 0.89, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7alpha__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 50.51, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7alpha__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 167.07, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7alpha__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 5.58, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7alpha__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -16785,862 +30084,1310 @@ "env": "rocm6_4_4-rocwmma", "env_base": "rocm6_4_4", "env_variant": "rocwmma", - "fa": false, + "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 985.32, + "tps_mean": 1113.73, + "tps_std": 1.45, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.09, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 49.04, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 6.92, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1014.02, + "tps_std": 2.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 50.96, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 49.34, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 6.92, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1116.12, + "tps_std": 3.27, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.11, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 186.52, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 6.91, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1021.92, + "tps_std": 1.12, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.07, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 188.56, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 6.91, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1100.61, + "tps_std": 4.23, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7.1-rocwmma__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.08, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7.1-rocwmma__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 70.66, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7.1-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 6.91, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7.1-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1007.54, + "tps_std": 4.33, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7.1-rocwmma__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.04, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7.1-rocwmma__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 70.57, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 6.92, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1107.98, + "tps_std": 1.17, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7.1__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.05, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7.1__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 171.2, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7.1__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 6.91, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7.1__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1015.07, + "tps_std": 2.17, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7.1__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.0, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7.1__hblt0__fa1.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 174.55, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7.1__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 6.91, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7.1__hblt0__fa1__longctx32768.log", + "build": { + "hash": "ee8dd5c65", + "number": "7035" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1100.81, + "tps_std": 1.25, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.0, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 69.72, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 6.91, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 963.09, + "tps_std": 2.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.02, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 71.68, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 6.91, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "build": { + "hash": "bca95ca51", + "number": "7036" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1113.49, + "tps_std": 3.85, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.02, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 177.96, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 6.92, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 968.65, + "tps_std": 2.86, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.01, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 173.34, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 6.91, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1378.42, "tps_std": 1.37, "error": false, "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 50.21, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 1101.41, - "tps_std": 1.79, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 49.92, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 910.75, - "tps_std": 2.97, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 50.19, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 1009.78, - "tps_std": 2.26, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 49.91, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1.log", - "build": { - "hash": "11f0af55", - "number": "6736" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 980.87, - "tps_std": 2.46, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 49.86, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 1113.8, - "tps_std": 2.42, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 49.4, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 896.7, - "tps_std": 0.33, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 49.87, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 1017.14, - "tps_std": 1.96, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 49.4, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 990.88, - "tps_std": 3.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 50.2, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 1092.96, - "tps_std": 3.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 49.81, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 875.3, - "tps_std": 2.5, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 50.23, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 956.75, - "tps_std": 3.28, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 49.98, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 987.11, - "tps_std": 2.95, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 50.23, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 1107.83, - "tps_std": 1.63, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 49.98, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 873.58, - "tps_std": 1.82, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 50.15, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc__hblt0.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 966.53, - "tps_std": 1.48, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 49.81, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 1325.29, - "tps_std": 2.18, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 53.69, - "tps_std": 0.09, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk.log", - "build": { - "hash": "a3cb0474", - "number": "6735" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 1376.09, - "tps_std": 0.77, - "error": false, - "error_type": null, "backend": "Vulkan", "ngl": 99, "mmap": 0, @@ -17650,8 +31397,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -17661,8 +31408,10 @@ "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 53.23, + "tps_mean": 53.41, "tps_std": 0.06, "error": false, "error_type": null, @@ -17675,20 +31424,22 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "vulkan_radv", - "env_base": "vulkan_radv", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 1005.9, - "tps_std": 1.71, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 101.79, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", @@ -17698,22 +31449,24 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__vulkan_radv.log", + "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "vulkan_radv", - "env_base": "vulkan_radv", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 54.61, - "tps_std": 0.02, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 9.25, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", @@ -17723,10 +31476,10 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__vulkan_radv.log", + "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -17736,9 +31489,11 @@ "env_base": "vulkan_radv", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "pp512", - "tps_mean": 1096.08, - "tps_std": 2.8, + "tps_mean": 1094.33, + "tps_std": 3.06, "error": false, "error_type": null, "backend": "Vulkan", @@ -17750,8 +31505,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__vulkan_radv__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" } }, { @@ -17761,9 +31516,11 @@ "env_base": "vulkan_radv", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, "test": "tg128", - "tps_mean": 53.86, - "tps_std": 0.02, + "tps_mean": 53.65, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", @@ -17775,8 +31532,62 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__vulkan_radv__fa1.log", "build": { - "hash": "a3cb0474", - "number": "6735" + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 174.6, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 8.7, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log", + "build": { + "hash": "1c398dc9e", + "number": "7034" } } ]