From 7aa6e6dea905813ef933b534371b1425d3c90c15 Mon Sep 17 00:00:00 2001 From: Donato Capitella Date: Sat, 11 Apr 2026 11:18:45 +0100 Subject: [PATCH] update benchmarks --- ...-Q4_K_XL-00001-of-00002__rocm-7_2__fa1.log | 0 ...-of-00002__rocm-7_2__fa1__longctx32768.log | 0 ...L-00001-of-00002__rocm-7_2__hblt0__fa1.log | 0 ...02__rocm-7_2__hblt0__fa1__longctx32768.log | 0 ...Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log | 8 + ...of-00002__rocm6_4_4__fa1__longctx32768.log | 8 + ...-00001-of-00002__rocm6_4_4__hblt0__fa1.log | 0 ...2__rocm6_4_4__hblt0__fa1__longctx32768.log | 0 ...L-00001-of-00002__rocm7-nightlies__fa1.log | 8 + ...02__rocm7-nightlies__fa1__longctx32768.log | 8 + ...-of-00002__rocm7-nightlies__hblt0__fa1.log | 0 ...m7-nightlies__hblt0__fa1__longctx32768.log | 0 ..._XL-00001-of-00002__vulkan_amdvlk__fa1.log | 24 + ...0002__vulkan_amdvlk__fa1__longctx32768.log | 23 + ..._K_XL-00001-of-00002__vulkan_radv__fa1.log | 8 + ...-00002__vulkan_radv__fa1__longctx32768.log | 8 + ...ash-BF16-00001-of-00002__rocm-7_2__fa1.log | 8 + ...-of-00002__rocm-7_2__fa1__longctx32768.log | 0 ...6-00001-of-00002__rocm-7_2__hblt0__fa1.log | 0 ...02__rocm-7_2__hblt0__fa1__longctx32768.log | 0 ...sh-BF16-00001-of-00002__rocm6_4_4__fa1.log | 8 + ...of-00002__rocm6_4_4__fa1__longctx32768.log | 8 + ...-00001-of-00002__rocm6_4_4__hblt0__fa1.log | 0 ...2__rocm6_4_4__hblt0__fa1__longctx32768.log | 0 ...6-00001-of-00002__rocm7-nightlies__fa1.log | 8 + ...02__rocm7-nightlies__fa1__longctx32768.log | 8 + ...-of-00002__rocm7-nightlies__hblt0__fa1.log | 0 ...m7-nightlies__hblt0__fa1__longctx32768.log | 0 ...F16-00001-of-00002__vulkan_amdvlk__fa1.log | 8 + ...0002__vulkan_amdvlk__fa1__longctx32768.log | 8 + ...-BF16-00001-of-00002__vulkan_radv__fa1.log | 8 + ...-00002__vulkan_radv__fa1__longctx32768.log | 8 + ...LM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1.log | 0 ...D-Q8_K_XL__rocm-7_2__fa1__longctx32768.log | 0 ...Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log | 0 ...XL__rocm-7_2__hblt0__fa1__longctx32768.log | 0 ...M-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log | 8 + ...-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log | 8 + ...lash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log | 0 ...L__rocm6_4_4__hblt0__fa1__longctx32768.log | 0 ...Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log | 8 + ...XL__rocm7-nightlies__fa1__longctx32768.log | 8 + ...D-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log | 0 ...m7-nightlies__hblt0__fa1__longctx32768.log | 0 ...7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log | 8 + ...K_XL__vulkan_amdvlk__fa1__longctx32768.log | 23 + ...4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log | 8 + ...8_K_XL__vulkan_radv__fa1__longctx32768.log | 8 + ...-14B-Instruct-2512-BF16__rocm-7_2__fa1.log | 0 ...2512-BF16__rocm-7_2__fa1__longctx32768.log | 0 ...struct-2512-BF16__rocm-7_2__hblt0__fa1.log | 0 ...16__rocm-7_2__hblt0__fa1__longctx32768.log | 0 ...14B-Instruct-2512-BF16__rocm6_4_4__fa1.log | 8 + ...512-BF16__rocm6_4_4__fa1__longctx32768.log | 8 + ...truct-2512-BF16__rocm6_4_4__hblt0__fa1.log | 0 ...6__rocm6_4_4__hblt0__fa1__longctx32768.log | 0 ...struct-2512-BF16__rocm7-nightlies__fa1.log | 8 + ...16__rocm7-nightlies__fa1__longctx32768.log | 8 + ...2512-BF16__rocm7-nightlies__hblt0__fa1.log | 0 ...m7-nightlies__hblt0__fa1__longctx32768.log | 0 ...Instruct-2512-BF16__vulkan_amdvlk__fa1.log | 23 + ...BF16__vulkan_amdvlk__fa1__longctx32768.log | 23 + ...B-Instruct-2512-BF16__vulkan_radv__fa1.log | 8 + ...2-BF16__vulkan_radv__fa1__longctx32768.log | 8 + ...-Q4_K_XL-00001-of-00003__rocm-7_2__fa1.log | 0 ...-of-00003__rocm-7_2__fa1__longctx32768.log | 0 ...L-00001-of-00003__rocm-7_2__hblt0__fa1.log | 0 ...03__rocm-7_2__hblt0__fa1__longctx32768.log | 0 ...Q4_K_XL-00001-of-00003__rocm6_4_4__fa1.log | 8 + ...of-00003__rocm6_4_4__fa1__longctx32768.log | 8 + ...-00001-of-00003__rocm6_4_4__hblt0__fa1.log | 0 ...3__rocm6_4_4__hblt0__fa1__longctx32768.log | 0 ...L-00001-of-00003__rocm7-nightlies__fa1.log | 8 + ...03__rocm7-nightlies__fa1__longctx32768.log | 8 + ...-of-00003__rocm7-nightlies__hblt0__fa1.log | 0 ...m7-nightlies__hblt0__fa1__longctx32768.log | 0 ..._XL-00001-of-00003__vulkan_amdvlk__fa1.log | 8 + ...0003__vulkan_amdvlk__fa1__longctx32768.log | 8 + ..._K_XL-00001-of-00003__vulkan_radv__fa1.log | 8 + ...-00003__vulkan_radv__fa1__longctx32768.log | 8 + ...-Q3_K_XL-00001-of-00003__rocm-7_2__fa1.log | 0 ...-of-00003__rocm-7_2__fa1__longctx32768.log | 0 ...L-00001-of-00003__rocm-7_2__hblt0__fa1.log | 0 ...03__rocm-7_2__hblt0__fa1__longctx32768.log | 0 ...Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log | 0 ...of-00003__rocm6_4_4__fa1__longctx32768.log | 0 ...-00001-of-00003__rocm6_4_4__hblt0__fa1.log | 0 ...3__rocm6_4_4__hblt0__fa1__longctx32768.log | 0 ...L-00001-of-00003__rocm7-nightlies__fa1.log | 0 ...03__rocm7-nightlies__fa1__longctx32768.log | 0 ...-of-00003__rocm7-nightlies__hblt0__fa1.log | 0 ...m7-nightlies__hblt0__fa1__longctx32768.log | 0 ..._XL-00001-of-00003__vulkan_amdvlk__fa1.log | 0 ...0003__vulkan_amdvlk__fa1__longctx32768.log | 0 ..._K_XL-00001-of-00003__vulkan_radv__fa1.log | 0 ...-00003__vulkan_radv__fa1__longctx32768.log | 0 ...A3B-BF16-00001-of-00002__rocm-7_2__fa1.log | 0 ...-of-00002__rocm-7_2__fa1__longctx32768.log | 0 ...6-00001-of-00002__rocm-7_2__hblt0__fa1.log | 0 ...02__rocm-7_2__hblt0__fa1__longctx32768.log | 0 ...3B-BF16-00001-of-00002__rocm6_4_4__fa1.log | 0 ...of-00002__rocm6_4_4__fa1__longctx32768.log | 0 ...-00001-of-00002__rocm6_4_4__hblt0__fa1.log | 0 ...2__rocm6_4_4__hblt0__fa1__longctx32768.log | 0 ...6-00001-of-00002__rocm7-nightlies__fa1.log | 0 ...02__rocm7-nightlies__fa1__longctx32768.log | 0 ...-of-00002__rocm7-nightlies__hblt0__fa1.log | 0 ...m7-nightlies__hblt0__fa1__longctx32768.log | 0 ...F16-00001-of-00002__vulkan_amdvlk__fa1.log | 0 ...0002__vulkan_amdvlk__fa1__longctx32768.log | 0 ...-BF16-00001-of-00002__vulkan_radv__fa1.log | 0 ...-00002__vulkan_radv__fa1__longctx32768.log | 0 ...nstruct-2507-UD-Q6_K_XL__rocm-7_2__fa1.log | 0 ...D-Q6_K_XL__rocm-7_2__fa1__longctx32768.log | 0 ...-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1.log | 0 ...XL__rocm-7_2__hblt0__fa1__longctx32768.log | 0 ...struct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log | 0 ...-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log | 0 ...2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log | 0 ...L__rocm6_4_4__hblt0__fa1__longctx32768.log | 0 ...-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log | 0 ...XL__rocm7-nightlies__fa1__longctx32768.log | 0 ...D-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log | 0 ...m7-nightlies__hblt0__fa1__longctx32768.log | 0 ...ct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log | 0 ...K_XL__vulkan_amdvlk__fa1__longctx32768.log | 0 ...ruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log | 0 ...6_K_XL__vulkan_radv__fa1__longctx32768.log | 0 ...30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1.log | 0 ...ct-Q4_K_M__rocm-7_2__fa1__longctx32768.log | 0 ...-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1.log | 0 ..._M__rocm-7_2__hblt0__fa1__longctx32768.log | 0 ...0B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log | 8 + ...t-Q4_K_M__rocm6_4_4__fa1__longctx32768.log | 8 + ...Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log | 0 ...M__rocm6_4_4__hblt0__fa1__longctx32768.log | 0 ...-Instruct-Q4_K_M__rocm7-nightlies__fa1.log | 8 + ..._M__rocm7-nightlies__fa1__longctx32768.log | 8 + ...ct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log | 0 ...m7-nightlies__hblt0__fa1__longctx32768.log | 0 ...3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log | 8 + ..._K_M__vulkan_amdvlk__fa1__longctx32768.log | 8 + ...-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log | 8 + ...Q4_K_M__vulkan_radv__fa1__longctx32768.log | 8 + ...-Q8_K_XL-00001-of-00003__rocm-7_2__fa1.log | 0 ...-of-00003__rocm-7_2__fa1__longctx32768.log | 0 ...L-00001-of-00003__rocm-7_2__hblt0__fa1.log | 0 ...03__rocm-7_2__hblt0__fa1__longctx32768.log | 0 ...Q8_K_XL-00001-of-00003__rocm6_4_4__fa1.log | 0 ...of-00003__rocm6_4_4__fa1__longctx32768.log | 0 ...-00001-of-00003__rocm6_4_4__hblt0__fa1.log | 0 ...3__rocm6_4_4__hblt0__fa1__longctx32768.log | 0 ...L-00001-of-00003__rocm7-nightlies__fa1.log | 0 ...03__rocm7-nightlies__fa1__longctx32768.log | 0 ...-of-00003__rocm7-nightlies__hblt0__fa1.log | 0 ...m7-nightlies__hblt0__fa1__longctx32768.log | 0 ..._XL-00001-of-00003__vulkan_amdvlk__fa1.log | 0 ...0003__vulkan_amdvlk__fa1__longctx32768.log | 0 ..._K_XL-00001-of-00003__vulkan_radv__fa1.log | 0 ...-00003__vulkan_radv__fa1__longctx32768.log | 0 ...-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log | 0 ...-of-00002__rocm-7_2__fa1__longctx32768.log | 0 ...L-00001-of-00002__rocm-7_2__hblt0__fa1.log | 0 ...02__rocm-7_2__hblt0__fa1__longctx32768.log | 0 ...Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log | 0 ...of-00002__rocm6_4_4__fa1__longctx32768.log | 0 ...-00001-of-00002__rocm6_4_4__hblt0__fa1.log | 0 ...2__rocm6_4_4__hblt0__fa1__longctx32768.log | 0 ...L-00001-of-00002__rocm7-nightlies__fa1.log | 0 ...02__rocm7-nightlies__fa1__longctx32768.log | 0 ...-of-00002__rocm7-nightlies__hblt0__fa1.log | 0 ...m7-nightlies__hblt0__fa1__longctx32768.log | 0 ..._XL-00001-of-00002__vulkan_amdvlk__fa1.log | 0 ...0002__vulkan_amdvlk__fa1__longctx32768.log | 0 ..._K_XL-00001-of-00002__vulkan_radv__fa1.log | 0 ...-00002__vulkan_radv__fa1__longctx32768.log | 0 ...-Q5_K_XL-00001-of-00003__rocm-7_2__fa1.log | 0 ...-of-00003__rocm-7_2__fa1__longctx32768.log | 0 ...L-00001-of-00003__rocm-7_2__hblt0__fa1.log | 0 ...03__rocm-7_2__hblt0__fa1__longctx32768.log | 0 ...Q5_K_XL-00001-of-00003__rocm6_4_4__fa1.log | 8 + ...of-00003__rocm6_4_4__fa1__longctx32768.log | 8 + ...-00001-of-00003__rocm6_4_4__hblt0__fa1.log | 0 ...3__rocm6_4_4__hblt0__fa1__longctx32768.log | 0 ...L-00001-of-00003__rocm7-nightlies__fa1.log | 8 + ...03__rocm7-nightlies__fa1__longctx32768.log | 8 + ...-of-00003__rocm7-nightlies__hblt0__fa1.log | 0 ...m7-nightlies__hblt0__fa1__longctx32768.log | 0 ..._XL-00001-of-00003__vulkan_amdvlk__fa1.log | 8 + ...0003__vulkan_amdvlk__fa1__longctx32768.log | 8 + ..._K_XL-00001-of-00003__vulkan_radv__fa1.log | 8 + ...-00003__vulkan_radv__fa1__longctx32768.log | 8 + ...A3B-BF16-00001-of-00002__rocm-7_2__fa1.log | 0 ...-of-00002__rocm-7_2__fa1__longctx32768.log | 0 ...6-00001-of-00002__rocm-7_2__hblt0__fa1.log | 0 ...02__rocm-7_2__hblt0__fa1__longctx32768.log | 0 ...3B-BF16-00001-of-00002__rocm6_4_4__fa1.log | 8 + ...of-00002__rocm6_4_4__fa1__longctx32768.log | 8 + ...-00001-of-00002__rocm6_4_4__hblt0__fa1.log | 0 ...2__rocm6_4_4__hblt0__fa1__longctx32768.log | 0 ...6-00001-of-00002__rocm7-nightlies__fa1.log | 8 + ...02__rocm7-nightlies__fa1__longctx32768.log | 8 + ...-of-00002__rocm7-nightlies__hblt0__fa1.log | 0 ...m7-nightlies__hblt0__fa1__longctx32768.log | 0 ...F16-00001-of-00002__vulkan_amdvlk__fa1.log | 8 + ...0002__vulkan_amdvlk__fa1__longctx32768.log | 8 + ...-BF16-00001-of-00002__vulkan_radv__fa1.log | 8 + ...-00002__vulkan_radv__fa1__longctx32768.log | 8 + ...n3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__fa1.log | 0 ...D-Q4_K_XL__rocm-7_2__fa1__longctx32768.log | 0 ...B-A3B-UD-Q4_K_XL__rocm-7_2__hblt0__fa1.log | 0 ...XL__rocm-7_2__hblt0__fa1__longctx32768.log | 0 ...3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1.log | 8 + ...-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log | 8 + ...-A3B-UD-Q4_K_XL__rocm6_4_4__hblt0__fa1.log | 0 ...L__rocm6_4_4__hblt0__fa1__longctx32768.log | 0 ...B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1.log | 8 + ...XL__rocm7-nightlies__fa1__longctx32768.log | 8 + ...D-Q4_K_XL__rocm7-nightlies__hblt0__fa1.log | 0 ...m7-nightlies__hblt0__fa1__longctx32768.log | 0 ...35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1.log | 8 + ...K_XL__vulkan_amdvlk__fa1__longctx32768.log | 8 + ...5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1.log | 8 + ...4_K_XL__vulkan_radv__fa1__longctx32768.log | 8 + ...mma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1.log | 0 ...D-Q8_K_XL__rocm-7_2__fa1__longctx32768.log | 0 ...2b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log | 0 ...XL__rocm-7_2__hblt0__fa1__longctx32768.log | 0 ...ma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log | 0 ...-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log | 0 ...b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log | 0 ...L__rocm6_4_4__hblt0__fa1__longctx32768.log | 0 ...2b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log | 0 ...XL__rocm7-nightlies__fa1__longctx32768.log | 0 ...D-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log | 0 ...m7-nightlies__hblt0__fa1__longctx32768.log | 0 ...-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log | 0 ...K_XL__vulkan_amdvlk__fa1__longctx32768.log | 0 ...-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log | 0 ...8_K_XL__vulkan_radv__fa1__longctx32768.log | 0 ...-it-BF16-00001-of-00002__rocm-7_2__fa1.log | 0 ...-of-00002__rocm-7_2__fa1__longctx32768.log | 0 ...6-00001-of-00002__rocm-7_2__hblt0__fa1.log | 0 ...02__rocm-7_2__hblt0__fa1__longctx32768.log | 0 ...it-BF16-00001-of-00002__rocm6_4_4__fa1.log | 0 ...of-00002__rocm6_4_4__fa1__longctx32768.log | 0 ...-00001-of-00002__rocm6_4_4__hblt0__fa1.log | 0 ...2__rocm6_4_4__hblt0__fa1__longctx32768.log | 0 ...6-00001-of-00002__rocm7-nightlies__fa1.log | 0 ...02__rocm7-nightlies__fa1__longctx32768.log | 0 ...-of-00002__rocm7-nightlies__hblt0__fa1.log | 0 ...m7-nightlies__hblt0__fa1__longctx32768.log | 0 ...F16-00001-of-00002__vulkan_amdvlk__fa1.log | 0 ...0002__vulkan_amdvlk__fa1__longctx32768.log | 0 ...-BF16-00001-of-00002__vulkan_radv__fa1.log | 0 ...-00002__vulkan_radv__fa1__longctx32768.log | 0 .../gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1.log | 0 ...it-Q3_K_S__rocm-7_2__fa1__longctx32768.log | 0 ...a-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1.log | 0 ..._S__rocm-7_2__hblt0__fa1__longctx32768.log | 0 .../gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log | 0 ...t-Q3_K_S__rocm6_4_4__fa1__longctx32768.log | 0 ...-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log | 0 ...S__rocm6_4_4__hblt0__fa1__longctx32768.log | 0 ...a-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log | 0 ..._S__rocm7-nightlies__fa1__longctx32768.log | 0 ...it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log | 0 ...m7-nightlies__hblt0__fa1__longctx32768.log | 0 ...mma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log | 0 ..._K_S__vulkan_amdvlk__fa1__longctx32768.log | 0 ...gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log | 0 ...Q3_K_S__vulkan_radv__fa1__longctx32768.log | 0 ...0b-mxfp4-00001-of-00003__rocm-7_2__fa1.log | 0 ...-of-00003__rocm-7_2__fa1__longctx32768.log | 0 ...4-00001-of-00003__rocm-7_2__hblt0__fa1.log | 0 ...03__rocm-7_2__hblt0__fa1__longctx32768.log | 0 ...b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log | 8 + ...of-00003__rocm6_4_4__fa1__longctx32768.log | 8 + ...-00001-of-00003__rocm6_4_4__hblt0__fa1.log | 0 ...3__rocm6_4_4__hblt0__fa1__longctx32768.log | 0 ...4-00001-of-00003__rocm7-nightlies__fa1.log | 8 + ...03__rocm7-nightlies__fa1__longctx32768.log | 8 + ...-of-00003__rocm7-nightlies__hblt0__fa1.log | 0 ...m7-nightlies__hblt0__fa1__longctx32768.log | 0 ...fp4-00001-of-00003__vulkan_amdvlk__fa1.log | 8 + ...0003__vulkan_amdvlk__fa1__longctx32768.log | 8 + ...mxfp4-00001-of-00003__vulkan_radv__fa1.log | 8 + ...-00003__vulkan_radv__fa1__longctx32768.log | 8 + .../gpt-oss-20b-mxfp4__rocm-7_2__fa1.log | 0 ...20b-mxfp4__rocm-7_2__fa1__longctx32768.log | 0 ...pt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1.log | 0 ...p4__rocm-7_2__hblt0__fa1__longctx32768.log | 0 .../gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log | 8 + ...0b-mxfp4__rocm6_4_4__fa1__longctx32768.log | 8 + ...t-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log | 0 ...4__rocm6_4_4__hblt0__fa1__longctx32768.log | 0 ...pt-oss-20b-mxfp4__rocm7-nightlies__fa1.log | 8 + ...p4__rocm7-nightlies__fa1__longctx32768.log | 8 + ...20b-mxfp4__rocm7-nightlies__hblt0__fa1.log | 0 ...m7-nightlies__hblt0__fa1__longctx32768.log | 0 .../gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log | 8 + ...xfp4__vulkan_amdvlk__fa1__longctx32768.log | 8 + .../gpt-oss-20b-mxfp4__vulkan_radv__fa1.log | 8 + ...-mxfp4__vulkan_radv__fa1__longctx32768.log | 8 + .../llama-2-7b.Q4_0__rocm-7_2__fa1.log | 0 ...2-7b.Q4_0__rocm-7_2__fa1__longctx32768.log | 0 .../llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1.log | 0 ..._0__rocm-7_2__hblt0__fa1__longctx32768.log | 0 .../llama-2-7b.Q4_0__rocm6_4_4__fa1.log | 8 + ...-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log | 8 + ...llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log | 0 ...0__rocm6_4_4__hblt0__fa1__longctx32768.log | 0 .../llama-2-7b.Q4_0__rocm7-nightlies__fa1.log | 8 + ..._0__rocm7-nightlies__fa1__longctx32768.log | 8 + ...2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log | 0 ...m7-nightlies__hblt0__fa1__longctx32768.log | 0 .../llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log | 8 + ...Q4_0__vulkan_amdvlk__fa1__longctx32768.log | 8 + .../llama-2-7b.Q4_0__vulkan_radv__fa1.log | 8 + ...b.Q4_0__vulkan_radv__fa1__longctx32768.log | 8 + benchmark/results/29-03-2026/system_info.json | 1 + ...4_K_XL-00001-of-00002__rocm-7_2_1__fa1.log | 8 + ...f-00002__rocm-7_2_1__fa1__longctx32768.log | 8 + ...Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log | 4 +- ...of-00002__rocm6_4_4__fa1__longctx32768.log | 6 +- ...L-00001-of-00002__rocm7-nightlies__fa1.log | 6 +- ...02__rocm7-nightlies__fa1__longctx32768.log | 6 +- ..._XL-00001-of-00002__vulkan_amdvlk__fa1.log | 38 +- ...0002__vulkan_amdvlk__fa1__longctx32768.log | 36 +- ..._K_XL-00001-of-00002__vulkan_radv__fa1.log | 4 +- ...-00002__vulkan_radv__fa1__longctx32768.log | 6 +- ...h-BF16-00001-of-00002__rocm-7_2_1__fa1.log | 8 + ...f-00002__rocm-7_2_1__fa1__longctx32768.log | 8 + ...ash-BF16-00001-of-00002__rocm-7_2__fa1.log | 6 - ...sh-BF16-00001-of-00002__rocm6_4_4__fa1.log | 6 +- ...of-00002__rocm6_4_4__fa1__longctx32768.log | 6 +- ...6-00001-of-00002__rocm7-nightlies__fa1.log | 6 +- ...02__rocm7-nightlies__fa1__longctx32768.log | 6 +- ...F16-00001-of-00002__vulkan_amdvlk__fa1.log | 6 +- ...0002__vulkan_amdvlk__fa1__longctx32768.log | 6 +- ...-BF16-00001-of-00002__vulkan_radv__fa1.log | 6 +- ...-00002__vulkan_radv__fa1__longctx32768.log | 6 +- ...-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1__fa1.log | 8 + ...Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log | 8 + ...M-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log | 6 +- ...-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log | 6 +- ...Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log | 6 +- ...XL__rocm7-nightlies__fa1__longctx32768.log | 6 +- ...7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log | 6 +- ...K_XL__vulkan_amdvlk__fa1__longctx32768.log | 37 +- ...4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log | 6 +- ...8_K_XL__vulkan_radv__fa1__longctx32768.log | 6 +- ...4B-Instruct-2512-BF16__rocm-7_2_1__fa1.log | 8 + ...12-BF16__rocm-7_2_1__fa1__longctx32768.log | 8 + ...14B-Instruct-2512-BF16__rocm6_4_4__fa1.log | 4 +- ...512-BF16__rocm6_4_4__fa1__longctx32768.log | 4 +- ...struct-2512-BF16__rocm7-nightlies__fa1.log | 6 +- ...16__rocm7-nightlies__fa1__longctx32768.log | 6 +- ...Instruct-2512-BF16__vulkan_amdvlk__fa1.log | 36 +- ...BF16__vulkan_amdvlk__fa1__longctx32768.log | 36 +- ...B-Instruct-2512-BF16__vulkan_radv__fa1.log | 4 +- ...2-BF16__vulkan_radv__fa1__longctx32768.log | 6 +- ...4_K_XL-00001-of-00003__rocm-7_2_1__fa1.log | 8 + ...f-00003__rocm-7_2_1__fa1__longctx32768.log | 8 + ...Q4_K_XL-00001-of-00003__rocm6_4_4__fa1.log | 6 +- ...of-00003__rocm6_4_4__fa1__longctx32768.log | 6 +- ...L-00001-of-00003__rocm7-nightlies__fa1.log | 6 +- ...03__rocm7-nightlies__fa1__longctx32768.log | 6 +- ..._XL-00001-of-00003__vulkan_amdvlk__fa1.log | 6 +- ...0003__vulkan_amdvlk__fa1__longctx32768.log | 6 +- ..._K_XL-00001-of-00003__vulkan_radv__fa1.log | 6 +- ...-00003__vulkan_radv__fa1__longctx32768.log | 6 +- ...B-A3B-Instruct-Q4_K_M__rocm-7_2_1__fa1.log | 8 + ...-Q4_K_M__rocm-7_2_1__fa1__longctx32768.log | 8 + ...0B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log | 6 +- ...t-Q4_K_M__rocm6_4_4__fa1__longctx32768.log | 6 +- ...-Instruct-Q4_K_M__rocm7-nightlies__fa1.log | 6 +- ..._M__rocm7-nightlies__fa1__longctx32768.log | 6 +- ...3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log | 6 +- ..._K_M__vulkan_amdvlk__fa1__longctx32768.log | 6 +- ...-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log | 6 +- ...Q4_K_M__vulkan_radv__fa1__longctx32768.log | 6 +- ...5_K_XL-00001-of-00003__rocm-7_2_1__fa1.log | 8 + ...f-00003__rocm-7_2_1__fa1__longctx32768.log | 8 + ...Q5_K_XL-00001-of-00003__rocm6_4_4__fa1.log | 6 +- ...of-00003__rocm6_4_4__fa1__longctx32768.log | 6 +- ...L-00001-of-00003__rocm7-nightlies__fa1.log | 6 +- ...03__rocm7-nightlies__fa1__longctx32768.log | 6 +- ..._XL-00001-of-00003__vulkan_amdvlk__fa1.log | 6 +- ...0003__vulkan_amdvlk__fa1__longctx32768.log | 6 +- ..._K_XL-00001-of-00003__vulkan_radv__fa1.log | 6 +- ...-00003__vulkan_radv__fa1__longctx32768.log | 4 +- ...B-BF16-00001-of-00002__rocm-7_2_1__fa1.log | 8 + ...f-00002__rocm-7_2_1__fa1__longctx32768.log | 8 + ...3B-BF16-00001-of-00002__rocm6_4_4__fa1.log | 6 +- ...of-00002__rocm6_4_4__fa1__longctx32768.log | 6 +- ...6-00001-of-00002__rocm7-nightlies__fa1.log | 6 +- ...02__rocm7-nightlies__fa1__longctx32768.log | 6 +- ...F16-00001-of-00002__vulkan_amdvlk__fa1.log | 6 +- ...0002__vulkan_amdvlk__fa1__longctx32768.log | 6 +- ...-BF16-00001-of-00002__vulkan_radv__fa1.log | 6 +- ...-00002__vulkan_radv__fa1__longctx32768.log | 4 +- ....5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1__fa1.log | 8 + ...Q4_K_XL__rocm-7_2_1__fa1__longctx32768.log | 8 + ...3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1.log | 6 +- ...-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log | 6 +- ...B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1.log | 6 +- ...XL__rocm7-nightlies__fa1__longctx32768.log | 6 +- ...35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1.log | 6 +- ...K_XL__vulkan_amdvlk__fa1__longctx32768.log | 6 +- ...5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1.log | 6 +- ...4_K_XL__vulkan_radv__fa1__longctx32768.log | 6 +- ....5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1__fa1.log | 8 + ...Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log | 8 + ...3.5-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log | 8 + ...-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log | 8 + ...B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log | 8 + ...XL__rocm7-nightlies__fa1__longctx32768.log | 8 + ...35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log | 8 + ...K_XL__vulkan_amdvlk__fa1__longctx32768.log | 8 + ...5-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log | 8 + ...8_K_XL__vulkan_radv__fa1__longctx32768.log | 8 + ...t-BF16-00001-of-00002__rocm-7_2_1__fa1.log | 8 + ...f-00002__rocm-7_2_1__fa1__longctx32768.log | 8 + ...it-BF16-00001-of-00002__rocm6_4_4__fa1.log | 8 + ...of-00002__rocm6_4_4__fa1__longctx32768.log | 8 + ...6-00001-of-00002__rocm7-nightlies__fa1.log | 8 + ...02__rocm7-nightlies__fa1__longctx32768.log | 8 + ...F16-00001-of-00002__vulkan_amdvlk__fa1.log | 8 + ...0002__vulkan_amdvlk__fa1__longctx32768.log | 8 + ...-BF16-00001-of-00002__vulkan_radv__fa1.log | 8 + ...-00002__vulkan_radv__fa1__longctx32768.log | 8 + ...26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1__fa1.log | 8 + ...Q4_K_XL__rocm-7_2_1__fa1__longctx32768.log | 8 + ...-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1.log | 8 + ...-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log | 8 + ...4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1.log | 8 + ...XL__rocm7-nightlies__fa1__longctx32768.log | 8 + ...-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1.log | 8 + ...K_XL__vulkan_amdvlk__fa1__longctx32768.log | 8 + ...6B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1.log | 8 + ...4_K_XL__vulkan_radv__fa1__longctx32768.log | 8 + ...26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1__fa1.log | 8 + ...Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log | 8 + ...-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1.log | 8 + ...-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log | 8 + ...4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log | 8 + ...XL__rocm7-nightlies__fa1__longctx32768.log | 8 + ...-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log | 8 + ...K_XL__vulkan_amdvlk__fa1__longctx32768.log | 8 + ...6B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1.log | 8 + ...8_K_XL__vulkan_radv__fa1__longctx32768.log | 8 + ...t-BF16-00001-of-00002__rocm-7_2_1__fa1.log | 8 + ...f-00002__rocm-7_2_1__fa1__longctx32768.log | 8 + ...it-BF16-00001-of-00002__rocm6_4_4__fa1.log | 8 + ...of-00002__rocm6_4_4__fa1__longctx32768.log | 8 + ...6-00001-of-00002__rocm7-nightlies__fa1.log | 8 + ...02__rocm7-nightlies__fa1__longctx32768.log | 8 + ...F16-00001-of-00002__vulkan_amdvlk__fa1.log | 23 + ...0002__vulkan_amdvlk__fa1__longctx32768.log | 23 + ...-BF16-00001-of-00002__vulkan_radv__fa1.log | 8 + ...-00002__vulkan_radv__fa1__longctx32768.log | 8 + ...a-4-31B-it-UD-Q4_K_XL__rocm-7_2_1__fa1.log | 8 + ...Q4_K_XL__rocm-7_2_1__fa1__longctx32768.log | 8 + ...ma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1.log | 8 + ...-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log | 8 + ...1B-it-UD-Q4_K_XL__rocm7-nightlies__fa1.log | 8 + ...XL__rocm7-nightlies__fa1__longctx32768.log | 8 + ...-31B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1.log | 8 + ...K_XL__vulkan_amdvlk__fa1__longctx32768.log | 23 + ...-4-31B-it-UD-Q4_K_XL__vulkan_radv__fa1.log | 8 + ...4_K_XL__vulkan_radv__fa1__longctx32768.log | 8 + ...a-4-31B-it-UD-Q8_K_XL__rocm-7_2_1__fa1.log | 8 + ...Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log | 8 + ...ma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1.log | 8 + ...-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log | 8 + ...1B-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log | 8 + ...XL__rocm7-nightlies__fa1__longctx32768.log | 8 + ...-31B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log | 8 + ...K_XL__vulkan_amdvlk__fa1__longctx32768.log | 23 + ...-4-31B-it-UD-Q8_K_XL__vulkan_radv__fa1.log | 8 + ...8_K_XL__vulkan_radv__fa1__longctx32768.log | 8 + ...-mxfp4-00001-of-00003__rocm-7_2_1__fa1.log | 8 + ...f-00003__rocm-7_2_1__fa1__longctx32768.log | 8 + ...b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log | 6 +- ...of-00003__rocm6_4_4__fa1__longctx32768.log | 6 +- ...4-00001-of-00003__rocm7-nightlies__fa1.log | 6 +- ...03__rocm7-nightlies__fa1__longctx32768.log | 6 +- ...fp4-00001-of-00003__vulkan_amdvlk__fa1.log | 6 +- ...0003__vulkan_amdvlk__fa1__longctx32768.log | 6 +- ...mxfp4-00001-of-00003__vulkan_radv__fa1.log | 6 +- ...-00003__vulkan_radv__fa1__longctx32768.log | 6 +- .../gpt-oss-20b-mxfp4__rocm-7_2_1__fa1.log | 8 + ...b-mxfp4__rocm-7_2_1__fa1__longctx32768.log | 8 + .../gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log | 6 +- ...0b-mxfp4__rocm6_4_4__fa1__longctx32768.log | 6 +- ...pt-oss-20b-mxfp4__rocm7-nightlies__fa1.log | 6 +- ...p4__rocm7-nightlies__fa1__longctx32768.log | 6 +- .../gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log | 6 +- ...xfp4__vulkan_amdvlk__fa1__longctx32768.log | 6 +- .../gpt-oss-20b-mxfp4__vulkan_radv__fa1.log | 6 +- ...-mxfp4__vulkan_radv__fa1__longctx32768.log | 6 +- .../llama-2-7b.Q4_0__rocm-7_2_1__fa1.log | 8 + ...7b.Q4_0__rocm-7_2_1__fa1__longctx32768.log | 8 + .../llama-2-7b.Q4_0__rocm6_4_4__fa1.log | 6 +- ...-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log | 6 +- .../llama-2-7b.Q4_0__rocm7-nightlies__fa1.log | 6 +- ..._0__rocm7-nightlies__fa1__longctx32768.log | 4 +- .../llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log | 6 +- ...Q4_0__vulkan_amdvlk__fa1__longctx32768.log | 6 +- .../llama-2-7b.Q4_0__vulkan_radv__fa1.log | 6 +- ...b.Q4_0__vulkan_radv__fa1__longctx32768.log | 6 +- benchmark/results/system_info.json | 2 +- docs/results.json | 18999 +++++----------- 514 files changed, 7863 insertions(+), 13522 deletions(-) rename benchmark/results/{ => 29-03-2026}/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log create mode 100644 benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log rename benchmark/results/{ => 29-03-2026}/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log rename benchmark/results/{ => 29-03-2026}/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log create mode 100644 benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log create mode 100644 benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1.log rename benchmark/results/{ => 29-03-2026}/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1.log create mode 100644 benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log rename benchmark/results/{ => 29-03-2026}/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log rename benchmark/results/{ => 29-03-2026}/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1.log create mode 100644 benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log rename benchmark/results/{ => 29-03-2026}/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log create mode 100644 benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log rename benchmark/results/{ => 29-03-2026}/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log rename benchmark/results/{ => 29-03-2026}/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log create mode 100644 benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log rename benchmark/results/{ => 29-03-2026}/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log create mode 100644 benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log rename benchmark/results/{ => 29-03-2026}/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log rename benchmark/results/{ => 29-03-2026}/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log create mode 100644 benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log rename benchmark/results/{ => 29-03-2026}/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1.log create mode 100644 benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log rename benchmark/results/{ => 29-03-2026}/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log rename benchmark/results/{ => 29-03-2026}/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1.log create mode 100644 benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log rename benchmark/results/{ => 29-03-2026}/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log create mode 100644 benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log rename benchmark/results/{ => 29-03-2026}/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log rename benchmark/results/{ => 29-03-2026}/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log create mode 100644 benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log rename benchmark/results/{ => 29-03-2026}/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_radv__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1.log create mode 100644 benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log rename benchmark/results/{ => 29-03-2026}/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log rename benchmark/results/{ => 29-03-2026}/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1.log create mode 100644 benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log rename benchmark/results/{ => 29-03-2026}/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log create mode 100644 benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log rename benchmark/results/{ => 29-03-2026}/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log rename benchmark/results/{ => 29-03-2026}/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log create mode 100644 benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log rename benchmark/results/{ => 29-03-2026}/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1.log create mode 100644 benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log rename benchmark/results/{ => 29-03-2026}/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log rename benchmark/results/{ => 29-03-2026}/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1.log create mode 100644 benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log rename benchmark/results/{ => 29-03-2026}/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log create mode 100644 benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log rename benchmark/results/{ => 29-03-2026}/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log rename benchmark/results/{ => 29-03-2026}/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log create mode 100644 benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log rename benchmark/results/{ => 29-03-2026}/gpt-oss-20b-mxfp4__rocm-7_2__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/gpt-oss-20b-mxfp4__rocm-7_2__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log create mode 100644 benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log rename benchmark/results/{ => 29-03-2026}/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log rename benchmark/results/{ => 29-03-2026}/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log create mode 100644 benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log rename benchmark/results/{ => 29-03-2026}/llama-2-7b.Q4_0__rocm-7_2__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/llama-2-7b.Q4_0__rocm-7_2__fa1__longctx32768.log (100%) rename benchmark/results/{ => 29-03-2026}/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1.log create mode 100644 benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log rename benchmark/results/{ => 29-03-2026}/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log rename benchmark/results/{ => 29-03-2026}/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log (100%) rename benchmark/results/{ => 29-03-2026}/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/29-03-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/29-03-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/29-03-2026/llama-2-7b.Q4_0__vulkan_radv__fa1.log create mode 100644 benchmark/results/29-03-2026/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log create mode 100644 benchmark/results/29-03-2026/system_info.json create mode 100644 benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1__fa1.log create mode 100644 benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log create mode 100644 benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1__fa1.log create mode 100644 benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log create mode 100644 benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1__fa1.log create mode 100644 benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log create mode 100644 benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1__fa1.log create mode 100644 benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1__fa1__longctx32768.log create mode 100644 benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1__fa1.log create mode 100644 benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1__fa1.log create mode 100644 benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1__fa1.log create mode 100644 benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1__fa1.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1__fa1.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1__fa1.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1__fa1.log create mode 100644 benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm6_4_4__fa1.log create mode 100644 benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_radv__fa1.log create mode 100644 benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1__fa1.log create mode 100644 benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1.log create mode 100644 benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1.log create mode 100644 benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1__fa1.log create mode 100644 benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1.log create mode 100644 benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1.log create mode 100644 benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1__fa1.log create mode 100644 benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm6_4_4__fa1.log create mode 100644 benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_radv__fa1.log create mode 100644 benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1__fa1.log create mode 100644 benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1.log create mode 100644 benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_radv__fa1.log create mode 100644 benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1__fa1.log create mode 100644 benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1.log create mode 100644 benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_radv__fa1.log create mode 100644 benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log create mode 100644 benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1__fa1.log create mode 100644 benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log create mode 100644 benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_1__fa1.log create mode 100644 benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_1__fa1__longctx32768.log create mode 100644 benchmark/results/llama-2-7b.Q4_0__rocm-7_2_1__fa1.log create mode 100644 benchmark/results/llama-2-7b.Q4_0__rocm-7_2_1__fa1__longctx32768.log diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1.log b/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1.log similarity index 100% rename from benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1.log rename to benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1.log diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log b/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log b/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log similarity index 100% rename from benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log rename to benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log new file mode 100644 index 0000000..1e98d22 --- /dev/null +++ b/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 78.37 ± 0.13 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.76 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..8999b32 --- /dev/null +++ b/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.56 ± 0.07 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.09 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log similarity index 100% rename from benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log rename to benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..29499ed --- /dev/null +++ b/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 80.49 ± 0.14 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.99 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..dedaf41 --- /dev/null +++ b/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 21.15 ± 0.06 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.07 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log similarity index 100% rename from benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log rename to benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..4540fd9 --- /dev/null +++ b/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log @@ -0,0 +1,24 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +/lib64/libggml-base.so.0(+0x3c25) [0x7f8003c3bc25] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f8003c3bfeb] +/lib64/libggml-base.so.0(+0x16669) [0x7f8003c4e669] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7f80033b2bfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f800339cd3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7f80033b2ea8] +/lib64/libggml-vulkan.so.0(+0x14f76) [0x7f8003cf6f76] +/lib64/libggml-vulkan.so.0(+0x13597f) [0x7f8003e1797f] +/lib64/libggml-vulkan.so.0(+0x136411) [0x7f8003e18411] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f8003c574d3] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f800756ce70] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f800756f445] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f8007575aaf] +/lib64/libllama.so.0(llama_decode+0xe) [0x7f800757742e] +/usr/sbin/llama-bench() [0x41cc3b] +/usr/sbin/llama-bench() [0x41977f] +/lib64/libc.so.6(+0x35b5) [0x7f80030835b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f8003083668] +/usr/sbin/llama-bench() [0x41b595] +terminate called after throwing an instance of 'vk::DeviceLostError' + what(): vk::Queue::submit: ErrorDeviceLost +✖ ! [vulkan_amdvlk] Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__fa1 failed (exit 0) diff --git a/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..8d7f3ff --- /dev/null +++ b/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,23 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +/lib64/libggml-base.so.0(+0x3c25) [0x7f34c7070c25] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f34c7070feb] +/lib64/libggml-base.so.0(+0x16669) [0x7f34c7083669] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7f34c67e7bfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f34c67d1d3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7f34c67e7ea8] +/lib64/libggml-vulkan.so.0(+0x16b68) [0x7f34c712db68] +/lib64/libggml-vulkan.so.0(+0xfcfd0) [0x7f34c7213fd0] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7f34c708c092] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f34ca9a1e70] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f34ca9a4445] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f34ca9aaaaf] +/lib64/libllama.so.0(llama_decode+0xe) [0x7f34ca9ac42e] +/usr/sbin/llama-bench() [0x41cc3b] +/usr/sbin/llama-bench() [0x41977f] +/lib64/libc.so.6(+0x35b5) [0x7f34c64b85b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f34c64b8668] +/usr/sbin/llama-bench() [0x41b595] +terminate called after throwing an instance of 'vk::DeviceLostError' + what(): vk::Queue::submit: ErrorDeviceLost +✖ ! [vulkan_amdvlk] Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log new file mode 100644 index 0000000..1c52acf --- /dev/null +++ b/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | pp512 | 47.20 ± 0.03 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | tg128 | 2.99 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..eb50e64 --- /dev/null +++ b/benchmark/results/29-03-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 7.39 ± 0.02 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 2.60 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1.log new file mode 100644 index 0000000..8bc6dfa --- /dev/null +++ b/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 435.72 ± 2.11 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 21.38 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log similarity index 100% rename from benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log rename to benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log similarity index 100% rename from benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log rename to benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1.log new file mode 100644 index 0000000..f8af5b7 --- /dev/null +++ b/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 393.19 ± 1.92 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 19.36 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..431595d --- /dev/null +++ b/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 94.74 ± 0.05 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.06 ± 0.18 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log similarity index 100% rename from benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log rename to benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..3da36d8 --- /dev/null +++ b/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 476.23 ± 2.21 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 21.36 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..8a819b6 --- /dev/null +++ b/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 86.79 ± 0.15 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.32 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log similarity index 100% rename from benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log rename to benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..1400856 --- /dev/null +++ b/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp512 | 114.88 ± 0.15 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg128 | 10.48 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..0c10034 --- /dev/null +++ b/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 10.15 ± 0.00 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 5.05 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1.log new file mode 100644 index 0000000..2c91e29 --- /dev/null +++ b/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp512 | 342.11 ± 5.47 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg128 | 9.48 ± 0.02 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..d0eb79f --- /dev/null +++ b/benchmark/results/29-03-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 102.93 ± 0.11 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 7.82 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1.log similarity index 100% rename from benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1.log rename to benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1.log diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log similarity index 100% rename from benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log rename to benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log similarity index 100% rename from benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log rename to benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log new file mode 100644 index 0000000..5f26656 --- /dev/null +++ b/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 903.39 ± 1.04 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 31.04 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..8dc2481 --- /dev/null +++ b/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 99.84 ± 0.23 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 20.33 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log similarity index 100% rename from benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log rename to benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..37c4d87 --- /dev/null +++ b/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 902.85 ± 59.94 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 33.16 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..d4a1f68 --- /dev/null +++ b/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 89.75 ± 0.31 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 20.59 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log similarity index 100% rename from benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log rename to benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..fcd1fbe --- /dev/null +++ b/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp512 | 499.98 ± 1.29 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg128 | 39.01 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..e9e723c --- /dev/null +++ b/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,23 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +/lib64/libggml-base.so.0(+0x3c25) [0x7fa3d16b4c25] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fa3d16b4feb] +/lib64/libggml-base.so.0(+0x16669) [0x7fa3d16c7669] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7fa3d0e2bbfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7fa3d0e15d3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7fa3d0e2bea8] +/lib64/libggml-vulkan.so.0(+0x16b68) [0x7fa3d1771b68] +/lib64/libggml-vulkan.so.0(+0xfcfd0) [0x7fa3d1857fd0] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7fa3d16d0092] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7fa3d4fe5e70] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7fa3d4fe8445] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7fa3d4feeaaf] +/lib64/libllama.so.0(llama_decode+0xe) [0x7fa3d4ff042e] +/usr/sbin/llama-bench() [0x41cc3b] +/usr/sbin/llama-bench() [0x419a10] +/lib64/libc.so.6(+0x35b5) [0x7fa3d0afc5b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7fa3d0afc668] +/usr/sbin/llama-bench() [0x41b595] +terminate called after throwing an instance of 'vk::DeviceLostError' + what(): vk::Queue::submit: ErrorDeviceLost +✖ ! [vulkan_amdvlk] GLM-4.7-Flash-UD-Q8_K_XL__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log new file mode 100644 index 0000000..c84fe4a --- /dev/null +++ b/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp512 | 853.46 ± 6.70 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg128 | 40.38 ± 1.22 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..3b8a27e --- /dev/null +++ b/benchmark/results/29-03-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 133.27 ± 0.03 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 21.17 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1.log b/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1.log similarity index 100% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1.log rename to benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1.log diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1__longctx32768.log b/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1__longctx32768.log diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1.log b/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1.log similarity index 100% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1.log rename to benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1.log diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log b/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log new file mode 100644 index 0000000..aaf2ed8 --- /dev/null +++ b/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 706.16 ± 3.76 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.39 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..f03361f --- /dev/null +++ b/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 166.61 ± 1.47 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.05 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log b/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log similarity index 100% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log rename to benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log b/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..4bfad54 --- /dev/null +++ b/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 881.45 ± 3.04 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.44 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..e15c3d6 --- /dev/null +++ b/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 213.50 ± 0.70 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.08 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log similarity index 100% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log rename to benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log b/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..47e06e4 --- /dev/null +++ b/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log @@ -0,0 +1,23 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +/lib64/libggml-base.so.0(+0x3c25) [0x7f9947881c25] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f9947881feb] +/lib64/libggml-base.so.0(+0x16669) [0x7f9947894669] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7f9946ff8bfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f9946fe2d3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7f9946ff8ea8] +/lib64/libggml-vulkan.so.0(+0x16b68) [0x7f994793eb68] +/lib64/libggml-vulkan.so.0(+0xfcfd0) [0x7f9947a24fd0] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7f994789d092] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f994b1b2e70] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f994b1b5445] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f994b1bbaaf] +/lib64/libllama.so.0(llama_decode+0xe) [0x7f994b1bd42e] +/usr/sbin/llama-bench() [0x41cc3b] +/usr/sbin/llama-bench() [0x419a10] +/lib64/libc.so.6(+0x35b5) [0x7f9946cc95b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f9946cc9668] +/usr/sbin/llama-bench() [0x41b595] +terminate called after throwing an instance of 'vk::DeviceLostError' + what(): vk::Queue::submit: ErrorDeviceLost +✖ ! [vulkan_amdvlk] Ministral-3-14B-Instruct-2512-BF16__fa1 failed (exit 0) diff --git a/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..59b5b4b --- /dev/null +++ b/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,23 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +/lib64/libggml-base.so.0(+0x3c25) [0x7f4bc4c80c25] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f4bc4c80feb] +/lib64/libggml-base.so.0(+0x16669) [0x7f4bc4c93669] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7f4bc43f7bfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f4bc43e1d3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7f4bc43f7ea8] +/lib64/libggml-vulkan.so.0(+0x16b68) [0x7f4bc4d3db68] +/lib64/libggml-vulkan.so.0(+0xfcfd0) [0x7f4bc4e23fd0] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7f4bc4c9c092] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f4bc85b1e70] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f4bc85b4445] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f4bc85baaaf] +/lib64/libllama.so.0(llama_decode+0xe) [0x7f4bc85bc42e] +/usr/sbin/llama-bench() [0x41cc3b] +/usr/sbin/llama-bench() [0x41977f] +/lib64/libc.so.6(+0x35b5) [0x7f4bc40c85b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f4bc40c8668] +/usr/sbin/llama-bench() [0x41b595] +terminate called after throwing an instance of 'vk::DeviceLostError' + what(): vk::Queue::submit: ErrorDeviceLost +✖ ! [vulkan_amdvlk] Ministral-3-14B-Instruct-2512-BF16__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log b/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log new file mode 100644 index 0000000..8311fc9 --- /dev/null +++ b/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp512 | 166.51 ± 0.62 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg128 | 7.94 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log b/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..645c378 --- /dev/null +++ b/benchmark/results/29-03-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 72.65 ± 0.24 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 6.70 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__fa1.log b/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__fa1.log similarity index 100% rename from benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__fa1.log rename to benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__fa1.log diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log b/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log similarity index 100% rename from benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log rename to benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log b/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log similarity index 100% rename from benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log rename to benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1.log new file mode 100644 index 0000000..c181684 --- /dev/null +++ b/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | pp512 | 260.98 ± 1.56 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | tg128 | 15.60 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..6d79881 --- /dev/null +++ b/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 324.69 ± 0.27 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.38 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log similarity index 100% rename from benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log rename to benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1.log b/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..2b76326 --- /dev/null +++ b/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | pp512 | 228.50 ± 1.06 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | tg128 | 16.07 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..39e5779 --- /dev/null +++ b/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 284.43 ± 0.38 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.76 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log similarity index 100% rename from benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log rename to benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..ecc3dbc --- /dev/null +++ b/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | pp512 | 140.84 ± 0.27 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | tg128 | 13.97 ± 0.05 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..fe06b04 --- /dev/null +++ b/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 106.21 ± 0.16 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 13.21 ± 0.32 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1.log new file mode 100644 index 0000000..cd213bb --- /dev/null +++ b/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | pp512 | 190.66 ± 7.11 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | tg128 | 14.41 ± 0.03 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..4618d0c --- /dev/null +++ b/benchmark/results/29-03-2026/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 191.12 ± 0.46 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 13.85 ± 0.26 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1.log b/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1.log rename to benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1.log diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log rename to benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log rename to benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log rename to benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log b/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log rename to benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log rename to benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log rename to benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log rename to benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log rename to benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log rename to benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log rename to benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log rename to benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log rename to benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log rename to benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log rename to benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log rename to benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1.log rename to benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1.log diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1.log rename to benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log rename to benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log rename to benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log rename to benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log rename to benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log rename to benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log rename to benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1.log b/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1.log similarity index 100% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1.log rename to benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1.log diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1.log rename to benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log b/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log new file mode 100644 index 0000000..9642b43 --- /dev/null +++ b/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1170.01 ± 7.53 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 65.12 ± 0.03 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..ad3e381 --- /dev/null +++ b/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 291.28 ± 0.14 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 32.65 ± 0.02 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log rename to benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log b/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..12481a0 --- /dev/null +++ b/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1178.27 ± 10.86 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.46 ± 0.62 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..95d24a2 --- /dev/null +++ b/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 209.14 ± 0.10 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 34.83 ± 0.02 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log rename to benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log b/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..b1b4b06 --- /dev/null +++ b/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 882.98 ± 3.84 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 80.84 ± 0.03 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..0e42549 --- /dev/null +++ b/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 111.65 ± 0.08 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 28.03 ± 0.02 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log b/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log new file mode 100644 index 0000000..3b18735 --- /dev/null +++ b/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 1290.50 ± 7.83 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 83.79 ± 0.18 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..253d842 --- /dev/null +++ b/benchmark/results/29-03-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 194.26 ± 0.86 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 37.04 ± 0.04 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__fa1.log b/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__fa1.log similarity index 100% rename from benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__fa1.log rename to benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__fa1.log diff --git a/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log rename to benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__fa1.log similarity index 100% rename from benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__fa1.log rename to benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__fa1.log diff --git a/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log rename to benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__fa1.log b/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__fa1.log similarity index 100% rename from benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__fa1.log rename to benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__fa1.log diff --git a/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log rename to benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log similarity index 100% rename from benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log rename to benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log diff --git a/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_radv__fa1.log similarity index 100% rename from benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_radv__fa1.log rename to benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_radv__fa1.log diff --git a/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log b/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log rename to benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log rename to benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log rename to benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log rename to benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log rename to benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log rename to benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log rename to benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log rename to benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__fa1.log similarity index 100% rename from benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__fa1.log rename to benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__fa1.log diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log rename to benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1.log new file mode 100644 index 0000000..83fa89a --- /dev/null +++ b/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | pp512 | 311.68 ± 1.84 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | tg128 | 18.77 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..995277c --- /dev/null +++ b/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 273.72 ± 1.22 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.91 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log rename to benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..0bb4c16 --- /dev/null +++ b/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | pp512 | 314.27 ± 4.13 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | tg128 | 19.66 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..482822d --- /dev/null +++ b/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 235.12 ± 5.24 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 18.36 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log rename to benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..2aa7628 --- /dev/null +++ b/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | pp512 | 183.05 ± 1.84 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | tg128 | 21.31 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..9159adc --- /dev/null +++ b/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 102.85 ± 0.15 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 18.76 ± 0.02 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1.log new file mode 100644 index 0000000..ebcc2cb --- /dev/null +++ b/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | pp512 | 239.56 ± 7.45 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | tg128 | 21.68 ± 0.38 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..ba5992f --- /dev/null +++ b/benchmark/results/29-03-2026/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 203.34 ± 0.47 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 20.09 ± 0.02 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log similarity index 100% rename from benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log rename to benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log rename to benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log new file mode 100644 index 0000000..955738e --- /dev/null +++ b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 544.11 ± 3.06 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 21.40 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..19940ae --- /dev/null +++ b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 407.19 ± 1.96 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 21.59 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log rename to benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..3151c98 --- /dev/null +++ b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 541.57 ± 11.33 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 23.69 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..25444a3 --- /dev/null +++ b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 441.64 ± 9.63 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 22.18 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log rename to benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..ec9c2e2 --- /dev/null +++ b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 122.56 ± 0.40 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 11.56 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..af1901a --- /dev/null +++ b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 97.32 ± 0.17 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 10.95 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log new file mode 100644 index 0000000..d914309 --- /dev/null +++ b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 309.96 ± 4.20 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 10.79 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..a17d6c3 --- /dev/null +++ b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 258.85 ± 0.77 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 10.39 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__fa1.log similarity index 100% rename from benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__fa1.log rename to benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__fa1.log diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__hblt0__fa1.log rename to benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__hblt0__fa1.log diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1.log new file mode 100644 index 0000000..77bc2b3 --- /dev/null +++ b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1079.44 ± 6.76 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 46.46 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..28cdd02 --- /dev/null +++ b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 762.29 ± 2.68 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 40.46 ± 0.02 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__hblt0__fa1.log rename to benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__hblt0__fa1.log diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..f34ef08 --- /dev/null +++ b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1092.86 ± 9.42 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 48.16 ± 0.30 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..ca64073 --- /dev/null +++ b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 655.39 ± 2.00 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 41.90 ± 0.03 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__hblt0__fa1.log rename to benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__hblt0__fa1.log diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..a947901 --- /dev/null +++ b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 661.63 ± 3.14 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 58.16 ± 0.08 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..dcdc814 --- /dev/null +++ b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 288.86 ± 0.53 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 44.24 ± 0.09 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1.log new file mode 100644 index 0000000..bc44011 --- /dev/null +++ b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 1013.40 ± 39.22 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 59.13 ± 0.07 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..2945779 --- /dev/null +++ b/benchmark/results/29-03-2026/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 673.55 ± 0.64 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 48.93 ± 0.13 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1.log b/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1.log rename to benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1.log diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log rename to benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log b/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log rename to benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log b/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log rename to benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log rename to benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log rename to benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log rename to benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log rename to benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log rename to benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log rename to benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log rename to benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log rename to benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log rename to benchmark/results/29-03-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1.log b/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1.log rename to benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1.log diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log rename to benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log b/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log rename to benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log rename to benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log rename to benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log rename to benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log rename to benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log rename to benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log rename to benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log rename to benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log rename to benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log rename to benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log rename to benchmark/results/29-03-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1.log b/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1.log rename to benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1.log diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1__longctx32768.log rename to benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1__longctx32768.log diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1.log b/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1.log rename to benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1.log diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log b/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log rename to benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log rename to benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log b/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log rename to benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log b/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log rename to benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log rename to benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log rename to benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log b/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log rename to benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log rename to benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log b/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log rename to benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log b/benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log rename to benchmark/results/29-03-2026/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1.log b/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1.log similarity index 100% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1.log rename to benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1.log diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1__longctx32768.log b/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1__longctx32768.log rename to benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1__longctx32768.log diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1.log b/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1.log similarity index 100% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1.log rename to benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1.log diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log new file mode 100644 index 0000000..492fecb --- /dev/null +++ b/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 648.44 ± 6.33 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 49.85 ± 0.04 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..c6c534c --- /dev/null +++ b/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 442.64 ± 0.84 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 39.69 ± 0.34 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log similarity index 100% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log rename to benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log b/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..d70f3d4 --- /dev/null +++ b/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 596.69 ± 97.42 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.38 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..9670ec2 --- /dev/null +++ b/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 308.13 ± 1.66 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 35.43 ± 8.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log similarity index 100% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log rename to benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..e8675e6 --- /dev/null +++ b/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 576.81 ± 2.43 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 51.18 ± 0.04 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..315ed48 --- /dev/null +++ b/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 213.74 ± 0.68 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 34.52 ± 0.07 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log new file mode 100644 index 0000000..c0ad41d --- /dev/null +++ b/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 633.21 ± 13.06 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 56.15 ± 0.02 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..fae75b7 --- /dev/null +++ b/benchmark/results/29-03-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 287.49 ± 1.21 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 42.67 ± 0.03 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2__fa1.log b/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm-7_2__fa1.log similarity index 100% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2__fa1.log rename to benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm-7_2__fa1.log diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2__fa1__longctx32768.log b/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm-7_2__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2__fa1__longctx32768.log rename to benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm-7_2__fa1__longctx32768.log diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1.log b/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1.log similarity index 100% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1.log rename to benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1.log diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log b/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log new file mode 100644 index 0000000..a7cf323 --- /dev/null +++ b/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1691.01 ± 16.68 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.07 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..45bb062 --- /dev/null +++ b/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 730.11 ± 1.16 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 57.02 ± 0.07 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log b/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log similarity index 100% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log rename to benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log b/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..932a36a --- /dev/null +++ b/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1668.50 ± 13.61 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.68 ± 0.10 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..65c4caa --- /dev/null +++ b/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 507.77 ± 2.81 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 57.46 ± 0.02 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1.log similarity index 100% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1.log rename to benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1.log diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log b/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..fa14b74 --- /dev/null +++ b/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1303.99 ± 6.84 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 73.68 ± 0.08 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..99d5ac7 --- /dev/null +++ b/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 364.73 ± 0.33 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 49.99 ± 0.21 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log b/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log new file mode 100644 index 0000000..917f775 --- /dev/null +++ b/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1577.96 ± 12.64 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 78.94 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log b/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..97ddca5 --- /dev/null +++ b/benchmark/results/29-03-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 541.12 ± 0.20 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 60.77 ± 0.08 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2__fa1.log b/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm-7_2__fa1.log similarity index 100% rename from benchmark/results/llama-2-7b.Q4_0__rocm-7_2__fa1.log rename to benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm-7_2__fa1.log diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2__fa1__longctx32768.log b/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm-7_2__fa1__longctx32768.log similarity index 100% rename from benchmark/results/llama-2-7b.Q4_0__rocm-7_2__fa1__longctx32768.log rename to benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm-7_2__fa1__longctx32768.log diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1.log b/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1.log similarity index 100% rename from benchmark/results/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1.log rename to benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1.log diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1.log b/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1.log new file mode 100644 index 0000000..5103c84 --- /dev/null +++ b/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1528.37 ± 6.40 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.14 ± 0.02 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..e0b84ff --- /dev/null +++ b/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 196.35 ± 2.62 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.97 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log b/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log similarity index 100% rename from benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log rename to benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log b/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..6e1ba6a --- /dev/null +++ b/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1507.68 ± 1.23 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.41 ± 0.14 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..c083ed2 --- /dev/null +++ b/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 206.97 ± 1.96 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.62 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log similarity index 100% rename from benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log rename to benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1__longctx32768.log rename to benchmark/results/29-03-2026/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/29-03-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log b/benchmark/results/29-03-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..a1c3881 --- /dev/null +++ b/benchmark/results/29-03-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 324.82 ± 0.45 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 55.43 ± 0.14 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/29-03-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..0c19fe3 --- /dev/null +++ b/benchmark/results/29-03-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 144.29 ± 1.09 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 10.20 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/llama-2-7b.Q4_0__vulkan_radv__fa1.log b/benchmark/results/29-03-2026/llama-2-7b.Q4_0__vulkan_radv__fa1.log new file mode 100644 index 0000000..f0e5a2f --- /dev/null +++ b/benchmark/results/29-03-2026/llama-2-7b.Q4_0__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 1313.97 ± 1.29 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 55.59 ± 0.05 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log b/benchmark/results/29-03-2026/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..50c5ec7 --- /dev/null +++ b/benchmark/results/29-03-2026/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 208.18 ± 2.24 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 8.52 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/29-03-2026/system_info.json b/benchmark/results/29-03-2026/system_info.json new file mode 100644 index 0000000..2efeb7c --- /dev/null +++ b/benchmark/results/29-03-2026/system_info.json @@ -0,0 +1 @@ +{"distro": "Fedora Linux 43 (Workstation Edition)", "kernel": "6.19.9-200.fc43.x86_64", "linux_firmware": "linux-firmware-20260309-1.fc43.noarch", "timestamp": "29 Mar 2026"} diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1__fa1.log new file mode 100644 index 0000000..2e9926a --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 78.36 ± 0.04 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.99 ± 0.00 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log new file mode 100644 index 0000000..488e9b3 --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.67 ± 0.05 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.08 ± 0.01 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log index 1e98d22..f2036b6 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 78.37 ± 0.13 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 78.40 ± 0.10 | | llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.76 ± 0.00 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log index 8999b32..d77eb38 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.56 ± 0.07 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.09 ± 0.00 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.57 ± 0.05 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.09 ± 0.01 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log index 29499ed..2a7f425 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 80.49 ± 0.14 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.99 ± 0.00 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 78.06 ± 0.06 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.95 ± 0.00 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log index dedaf41..b6b5178 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 21.15 ± 0.06 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.07 ± 0.01 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 18.34 ± 0.14 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.06 ± 0.01 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log index 4540fd9..7e0aed4 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log @@ -1,24 +1,24 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -/lib64/libggml-base.so.0(+0x3c25) [0x7f8003c3bc25] -/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f8003c3bfeb] -/lib64/libggml-base.so.0(+0x16669) [0x7f8003c4e669] -/lib64/libstdc++.so.6(+0x1ebfc) [0x7f80033b2bfc] -/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f800339cd3a] -/lib64/libstdc++.so.6(+0x1eea8) [0x7f80033b2ea8] -/lib64/libggml-vulkan.so.0(+0x14f76) [0x7f8003cf6f76] -/lib64/libggml-vulkan.so.0(+0x13597f) [0x7f8003e1797f] -/lib64/libggml-vulkan.so.0(+0x136411) [0x7f8003e18411] -/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f8003c574d3] -/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f800756ce70] -/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f800756f445] -/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f8007575aaf] -/lib64/libllama.so.0(llama_decode+0xe) [0x7f800757742e] -/usr/sbin/llama-bench() [0x41cc3b] -/usr/sbin/llama-bench() [0x41977f] -/lib64/libc.so.6(+0x35b5) [0x7f80030835b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f8003083668] -/usr/sbin/llama-bench() [0x41b595] +/lib64/libggml-base.so.0(+0x4465) [0x7f656fe92465] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f656fe9282b] +/lib64/libggml-base.so.0(+0x16ed9) [0x7f656fea4ed9] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7f656f608bfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f656f5f2d3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7f656f608ea8] +/lib64/libggml-vulkan.so.0(+0x157c0) [0x7f656ff607c0] +/lib64/libggml-vulkan.so.0(+0x13fa8f) [0x7f657008aa8f] +/lib64/libggml-vulkan.so.0(+0x140521) [0x7f657008b521] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f656feae553] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f6573ac9530] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f6573acbb05] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f6573ad226f] +/lib64/libllama.so.0(llama_decode+0xe) [0x7f6573ad3bee] +/usr/sbin/llama-bench() [0x41a1bb] +/usr/sbin/llama-bench() [0x416c49] +/lib64/libc.so.6(+0x35b5) [0x7f656f2d95b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f656f2d9668] +/usr/sbin/llama-bench() [0x4189d5] terminate called after throwing an instance of 'vk::DeviceLostError' what(): vk::Queue::submit: ErrorDeviceLost ✖ ! [vulkan_amdvlk] Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__fa1 failed (exit 0) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log index 8d7f3ff..1ff0669 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -1,23 +1,23 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -/lib64/libggml-base.so.0(+0x3c25) [0x7f34c7070c25] -/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f34c7070feb] -/lib64/libggml-base.so.0(+0x16669) [0x7f34c7083669] -/lib64/libstdc++.so.6(+0x1ebfc) [0x7f34c67e7bfc] -/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f34c67d1d3a] -/lib64/libstdc++.so.6(+0x1eea8) [0x7f34c67e7ea8] -/lib64/libggml-vulkan.so.0(+0x16b68) [0x7f34c712db68] -/lib64/libggml-vulkan.so.0(+0xfcfd0) [0x7f34c7213fd0] -/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7f34c708c092] -/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f34ca9a1e70] -/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f34ca9a4445] -/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f34ca9aaaaf] -/lib64/libllama.so.0(llama_decode+0xe) [0x7f34ca9ac42e] -/usr/sbin/llama-bench() [0x41cc3b] -/usr/sbin/llama-bench() [0x41977f] -/lib64/libc.so.6(+0x35b5) [0x7f34c64b85b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f34c64b8668] -/usr/sbin/llama-bench() [0x41b595] +/lib64/libggml-base.so.0(+0x4465) [0x7f178fa50465] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f178fa5082b] +/lib64/libggml-base.so.0(+0x16ed9) [0x7f178fa62ed9] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7f178f1c6bfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f178f1b0d3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7f178f1c6ea8] +/lib64/libggml-vulkan.so.0(+0x173b2) [0x7f178fb203b2] +/lib64/libggml-vulkan.so.0(+0x1075d0) [0x7f178fc105d0] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7f178fa6c112] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f1793687530] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f1793689b05] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f179369026f] +/lib64/libllama.so.0(llama_decode+0xe) [0x7f1793691bee] +/usr/sbin/llama-bench() [0x41a1bb] +/usr/sbin/llama-bench() [0x416c49] +/lib64/libc.so.6(+0x35b5) [0x7f178ee975b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f178ee97668] +/usr/sbin/llama-bench() [0x4189d5] terminate called after throwing an instance of 'vk::DeviceLostError' what(): vk::Queue::submit: ErrorDeviceLost ✖ ! [vulkan_amdvlk] Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log index 1c52acf..b4ac940 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | pp512 | 47.20 ± 0.03 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | pp512 | 46.91 ± 0.02 | | llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | tg128 | 2.99 ± 0.00 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log index eb50e64..cc9c080 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 7.39 ± 0.02 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 2.60 ± 0.00 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 7.32 ± 0.01 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 2.59 ± 0.00 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1__fa1.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1__fa1.log new file mode 100644 index 0000000..ac65f9e --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 404.18 ± 1.94 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 20.88 ± 0.01 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log new file mode 100644 index 0000000..67fbf08 --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 88.16 ± 0.08 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.07 ± 0.20 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1.log index 8bc6dfa..d1f71ff 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1.log @@ -1,8 +1,2 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 435.72 ± 2.11 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 21.38 ± 0.00 | - -build: 2405d59cb (8577) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1.log index f8af5b7..9250782 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 393.19 ± 1.92 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 19.36 ± 0.00 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 392.81 ± 1.96 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 18.97 ± 0.00 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log index 431595d..4662794 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 94.74 ± 0.05 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.06 ± 0.18 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 94.55 ± 0.17 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 14.89 ± 0.01 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1.log index 3da36d8..5735731 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 476.23 ± 2.21 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 21.36 ± 0.00 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 400.57 ± 4.10 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 19.39 ± 0.00 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log index 8a819b6..ef19e5d 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 86.79 ± 0.15 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.32 ± 0.00 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 86.01 ± 0.04 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 14.86 ± 0.03 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1.log index 1400856..e2f4794 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp512 | 114.88 ± 0.15 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg128 | 10.48 ± 0.00 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp512 | 114.94 ± 0.15 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg128 | 10.48 ± 0.01 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log index 0c10034..2dc96f2 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 10.15 ± 0.00 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 5.05 ± 0.00 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 10.14 ± 0.00 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 5.04 ± 0.00 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1.log index 2c91e29..a0c45c2 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp512 | 342.11 ± 5.47 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg128 | 9.48 ± 0.02 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp512 | 326.36 ± 4.62 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg128 | 9.50 ± 0.02 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log index d0eb79f..8e29b1a 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 102.93 ± 0.11 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 7.82 ± 0.01 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 102.00 ± 0.06 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 7.82 ± 0.02 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1__fa1.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1__fa1.log new file mode 100644 index 0000000..bf4287e --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 914.78 ± 3.38 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 32.73 ± 0.00 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log new file mode 100644 index 0000000..20f5d80 --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 91.49 ± 0.75 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 20.47 ± 0.00 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log index 5f26656..45a93b4 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 903.39 ± 1.04 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 31.04 ± 0.01 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 903.01 ± 2.32 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 30.77 ± 0.00 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log index 8dc2481..84cc9c5 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 99.84 ± 0.23 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 20.33 ± 0.00 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 99.70 ± 0.23 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.93 ± 0.00 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log index 37c4d87..a2cc15a 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 902.85 ± 59.94 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 33.16 ± 0.00 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 888.70 ± 38.05 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 32.81 ± 0.00 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log index d4a1f68..ac18d67 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 89.75 ± 0.31 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 20.59 ± 0.00 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 89.53 ± 0.09 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 20.47 ± 0.00 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log index fcd1fbe..57fea0d 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp512 | 499.98 ± 1.29 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg128 | 39.01 ± 0.01 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp512 | 499.74 ± 1.49 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg128 | 39.02 ± 0.02 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log index e9e723c..7615176 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log @@ -1,23 +1,24 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -/lib64/libggml-base.so.0(+0x3c25) [0x7fa3d16b4c25] -/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fa3d16b4feb] -/lib64/libggml-base.so.0(+0x16669) [0x7fa3d16c7669] -/lib64/libstdc++.so.6(+0x1ebfc) [0x7fa3d0e2bbfc] -/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7fa3d0e15d3a] -/lib64/libstdc++.so.6(+0x1eea8) [0x7fa3d0e2bea8] -/lib64/libggml-vulkan.so.0(+0x16b68) [0x7fa3d1771b68] -/lib64/libggml-vulkan.so.0(+0xfcfd0) [0x7fa3d1857fd0] -/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7fa3d16d0092] -/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7fa3d4fe5e70] -/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7fa3d4fe8445] -/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7fa3d4feeaaf] -/lib64/libllama.so.0(llama_decode+0xe) [0x7fa3d4ff042e] -/usr/sbin/llama-bench() [0x41cc3b] -/usr/sbin/llama-bench() [0x419a10] -/lib64/libc.so.6(+0x35b5) [0x7fa3d0afc5b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7fa3d0afc668] -/usr/sbin/llama-bench() [0x41b595] +/lib64/libggml-base.so.0(+0x4465) [0x7f03039c2465] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f03039c282b] +/lib64/libggml-base.so.0(+0x16ed9) [0x7f03039d4ed9] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7f0303138bfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f0303122d3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7f0303138ea8] +/lib64/libggml-vulkan.so.0(+0x157c0) [0x7f0303a907c0] +/lib64/libggml-vulkan.so.0(+0x13f96a) [0x7f0303bba96a] +/lib64/libggml-vulkan.so.0(+0x140521) [0x7f0303bbb521] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f03039de553] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f03075f9530] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f03075fbb05] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f030760226f] +/lib64/libllama.so.0(llama_decode+0xe) [0x7f0307603bee] +/usr/sbin/llama-bench() [0x41a1bb] +/usr/sbin/llama-bench() [0x416ec6] +/lib64/libc.so.6(+0x35b5) [0x7f0302e095b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f0302e09668] +/usr/sbin/llama-bench() [0x4189d5] terminate called after throwing an instance of 'vk::DeviceLostError' what(): vk::Queue::submit: ErrorDeviceLost ✖ ! [vulkan_amdvlk] GLM-4.7-Flash-UD-Q8_K_XL__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log index c84fe4a..fd4bd04 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp512 | 853.46 ± 6.70 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg128 | 40.38 ± 1.22 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp512 | 875.80 ± 2.04 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg128 | 40.19 ± 1.39 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log index 3b8a27e..df14c95 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 133.27 ± 0.03 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 21.17 ± 0.01 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 134.10 ± 0.07 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 21.18 ± 0.03 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1__fa1.log new file mode 100644 index 0000000..8317da0 --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 747.81 ± 7.01 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.42 ± 0.00 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1__fa1__longctx32768.log new file mode 100644 index 0000000..783e117 --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 182.77 ± 1.55 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.08 ± 0.00 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log index aaf2ed8..7303ffc 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 706.16 ± 3.76 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 716.84 ± 2.31 | | mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.39 ± 0.00 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log index f03361f..644eac3 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 166.61 ± 1.47 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 166.12 ± 0.76 | | mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.05 ± 0.00 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log index 4bfad54..3609ee1 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 881.45 ± 3.04 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.44 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 726.92 ± 2.95 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.43 ± 0.00 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log index e15c3d6..37b0832 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 213.50 ± 0.70 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.08 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 162.40 ± 0.45 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.07 ± 0.00 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log index 47e06e4..43d4db9 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log @@ -1,23 +1,23 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -/lib64/libggml-base.so.0(+0x3c25) [0x7f9947881c25] -/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f9947881feb] -/lib64/libggml-base.so.0(+0x16669) [0x7f9947894669] -/lib64/libstdc++.so.6(+0x1ebfc) [0x7f9946ff8bfc] -/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f9946fe2d3a] -/lib64/libstdc++.so.6(+0x1eea8) [0x7f9946ff8ea8] -/lib64/libggml-vulkan.so.0(+0x16b68) [0x7f994793eb68] -/lib64/libggml-vulkan.so.0(+0xfcfd0) [0x7f9947a24fd0] -/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7f994789d092] -/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f994b1b2e70] -/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f994b1b5445] -/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f994b1bbaaf] -/lib64/libllama.so.0(llama_decode+0xe) [0x7f994b1bd42e] -/usr/sbin/llama-bench() [0x41cc3b] -/usr/sbin/llama-bench() [0x419a10] -/lib64/libc.so.6(+0x35b5) [0x7f9946cc95b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f9946cc9668] -/usr/sbin/llama-bench() [0x41b595] +/lib64/libggml-base.so.0(+0x4465) [0x7f143de79465] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f143de7982b] +/lib64/libggml-base.so.0(+0x16ed9) [0x7f143de8bed9] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7f143d5efbfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f143d5d9d3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7f143d5efea8] +/lib64/libggml-vulkan.so.0(+0x173b2) [0x7f143df493b2] +/lib64/libggml-vulkan.so.0(+0x1075d0) [0x7f143e0395d0] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7f143de95112] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f1441ab0530] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f1441ab2b05] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f1441ab926f] +/lib64/libllama.so.0(llama_decode+0xe) [0x7f1441ababee] +/usr/sbin/llama-bench() [0x41a1bb] +/usr/sbin/llama-bench() [0x416ec6] +/lib64/libc.so.6(+0x35b5) [0x7f143d2c05b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f143d2c0668] +/usr/sbin/llama-bench() [0x4189d5] terminate called after throwing an instance of 'vk::DeviceLostError' what(): vk::Queue::submit: ErrorDeviceLost ✖ ! [vulkan_amdvlk] Ministral-3-14B-Instruct-2512-BF16__fa1 failed (exit 0) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log index 59b5b4b..3fad042 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log @@ -1,23 +1,23 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -/lib64/libggml-base.so.0(+0x3c25) [0x7f4bc4c80c25] -/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f4bc4c80feb] -/lib64/libggml-base.so.0(+0x16669) [0x7f4bc4c93669] -/lib64/libstdc++.so.6(+0x1ebfc) [0x7f4bc43f7bfc] -/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f4bc43e1d3a] -/lib64/libstdc++.so.6(+0x1eea8) [0x7f4bc43f7ea8] -/lib64/libggml-vulkan.so.0(+0x16b68) [0x7f4bc4d3db68] -/lib64/libggml-vulkan.so.0(+0xfcfd0) [0x7f4bc4e23fd0] -/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7f4bc4c9c092] -/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f4bc85b1e70] -/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f4bc85b4445] -/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f4bc85baaaf] -/lib64/libllama.so.0(llama_decode+0xe) [0x7f4bc85bc42e] -/usr/sbin/llama-bench() [0x41cc3b] -/usr/sbin/llama-bench() [0x41977f] -/lib64/libc.so.6(+0x35b5) [0x7f4bc40c85b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f4bc40c8668] -/usr/sbin/llama-bench() [0x41b595] +/lib64/libggml-base.so.0(+0x4465) [0x7f4a99ca1465] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f4a99ca182b] +/lib64/libggml-base.so.0(+0x16ed9) [0x7f4a99cb3ed9] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7f4a99417bfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f4a99401d3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7f4a99417ea8] +/lib64/libggml-vulkan.so.0(+0x173b2) [0x7f4a99d713b2] +/lib64/libggml-vulkan.so.0(+0x1075d0) [0x7f4a99e615d0] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7f4a99cbd112] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f4a9d8d8530] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f4a9d8dab05] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f4a9d8e126f] +/lib64/libllama.so.0(llama_decode+0xe) [0x7f4a9d8e2bee] +/usr/sbin/llama-bench() [0x41a1bb] +/usr/sbin/llama-bench() [0x416c49] +/lib64/libc.so.6(+0x35b5) [0x7f4a990e85b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f4a990e8668] +/usr/sbin/llama-bench() [0x4189d5] terminate called after throwing an instance of 'vk::DeviceLostError' what(): vk::Queue::submit: ErrorDeviceLost ✖ ! [vulkan_amdvlk] Ministral-3-14B-Instruct-2512-BF16__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log index 8311fc9..22f733b 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp512 | 166.51 ± 0.62 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp512 | 164.20 ± 0.55 | | mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg128 | 7.94 ± 0.00 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log index 645c378..d95eba5 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 72.65 ± 0.24 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 6.70 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 72.41 ± 0.21 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 6.71 ± 0.00 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1__fa1.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1__fa1.log new file mode 100644 index 0000000..84442e0 --- /dev/null +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | pp512 | 264.16 ± 1.43 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | tg128 | 16.01 ± 0.04 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log new file mode 100644 index 0000000..3ba1084 --- /dev/null +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 299.29 ± 0.40 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.59 ± 0.01 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1.log index c181684..86597fe 100644 --- a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1.log +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | pp512 | 260.98 ± 1.56 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | tg128 | 15.60 ± 0.00 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | pp512 | 261.51 ± 1.50 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | tg128 | 15.63 ± 0.00 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log index 6d79881..7d5f443 100644 --- a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 324.69 ± 0.27 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.38 ± 0.01 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 323.83 ± 0.28 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.34 ± 0.01 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1.log index 2b76326..229bdfb 100644 --- a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1.log +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | pp512 | 228.50 ± 1.06 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | tg128 | 16.07 ± 0.00 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | pp512 | 232.54 ± 0.27 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | tg128 | 15.81 ± 0.00 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log index 39e5779..0446b85 100644 --- a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 284.43 ± 0.38 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.76 ± 0.01 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 284.15 ± 0.23 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.34 ± 0.61 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log index ecc3dbc..7fc882e 100644 --- a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | pp512 | 140.84 ± 0.27 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | tg128 | 13.97 ± 0.05 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | pp512 | 138.78 ± 0.58 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | tg128 | 14.45 ± 0.09 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log index fe06b04..83f7a70 100644 --- a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 106.21 ± 0.16 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 13.21 ± 0.32 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 104.96 ± 0.02 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 13.66 ± 0.21 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1.log index cd213bb..412da30 100644 --- a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1.log +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | pp512 | 190.66 ± 7.11 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | tg128 | 14.41 ± 0.03 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | pp512 | 191.97 ± 8.53 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | tg128 | 14.70 ± 0.35 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log index 4618d0c..e3a63f3 100644 --- a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 191.12 ± 0.46 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 13.85 ± 0.26 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 185.90 ± 0.18 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 14.21 ± 0.38 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1__fa1.log new file mode 100644 index 0000000..9f9df90 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1148.88 ± 7.32 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 67.80 ± 0.73 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1__fa1__longctx32768.log new file mode 100644 index 0000000..569cf72 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 194.78 ± 0.05 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.46 ± 0.02 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log index 9642b43..5011d54 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1170.01 ± 7.53 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 65.12 ± 0.03 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1167.77 ± 7.34 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 65.06 ± 0.02 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log index ad3e381..724b2a2 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 291.28 ± 0.14 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 32.65 ± 0.02 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 291.69 ± 0.26 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 32.81 ± 0.02 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log index 12481a0..acd091b 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1178.27 ± 10.86 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.46 ± 0.62 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1185.86 ± 2.21 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 68.95 ± 0.01 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log index 95d24a2..c0601b2 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 209.14 ± 0.10 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 34.83 ± 0.02 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 203.21 ± 0.11 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 34.77 ± 0.02 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log index b1b4b06..f60bdd6 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 882.98 ± 3.84 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 80.84 ± 0.03 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 882.68 ± 3.83 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 80.48 ± 0.06 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log index 0e42549..ec6b25c 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 111.65 ± 0.08 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 28.03 ± 0.02 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 111.69 ± 0.03 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 28.03 ± 0.03 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log index 3b18735..7d5e7d4 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 1290.50 ± 7.83 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 83.79 ± 0.18 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 1145.07 ± 8.85 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 82.16 ± 3.05 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log index 253d842..36ea50e 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 194.26 ± 0.86 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 37.04 ± 0.04 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 194.38 ± 0.33 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 37.09 ± 0.05 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1__fa1.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1__fa1.log new file mode 100644 index 0000000..c7315fb --- /dev/null +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | pp512 | 312.22 ± 2.95 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | tg128 | 19.60 ± 0.06 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log new file mode 100644 index 0000000..b3d3697 --- /dev/null +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 250.19 ± 0.15 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.88 ± 1.13 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1.log index 83fa89a..23f86cc 100644 --- a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1.log +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | pp512 | 311.68 ± 1.84 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | tg128 | 18.77 ± 0.01 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | pp512 | 316.73 ± 1.27 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | tg128 | 18.86 ± 0.02 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log index 995277c..4842808 100644 --- a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 273.72 ± 1.22 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.91 ± 0.01 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 275.81 ± 1.62 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.90 ± 0.01 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1.log index 0bb4c16..dbdb4c2 100644 --- a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1.log +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | pp512 | 314.27 ± 4.13 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | tg128 | 19.66 ± 0.00 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | pp512 | 318.17 ± 2.18 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | tg128 | 19.57 ± 0.14 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log index 482822d..5607dd4 100644 --- a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 235.12 ± 5.24 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 18.36 ± 0.01 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 223.61 ± 0.83 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 18.32 ± 0.01 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log index 2aa7628..f1a164d 100644 --- a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | pp512 | 183.05 ± 1.84 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | tg128 | 21.31 ± 0.00 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | pp512 | 182.98 ± 1.82 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | tg128 | 21.33 ± 0.00 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log index 9159adc..c587a4f 100644 --- a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 102.85 ± 0.15 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 18.76 ± 0.02 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 102.84 ± 0.14 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 18.77 ± 0.04 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1.log index ebcc2cb..f531091 100644 --- a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | pp512 | 239.56 ± 7.45 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | tg128 | 21.68 ± 0.38 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | pp512 | 241.15 ± 7.43 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | tg128 | 21.75 ± 0.24 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log index ba5992f..502caf7 100644 --- a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 203.34 ± 0.47 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 202.99 ± 0.41 | | qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 20.09 ± 0.02 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1__fa1.log b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1__fa1.log new file mode 100644 index 0000000..f236f09 --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 509.12 ± 4.29 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 23.63 ± 0.12 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log new file mode 100644 index 0000000..c3274c3 --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 390.80 ± 4.30 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 21.61 ± 0.00 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log index 955738e..ca09bdd 100644 --- a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log +++ b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 544.11 ± 3.06 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 21.40 ± 0.01 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 545.20 ± 3.50 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 21.99 ± 0.05 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log index 19940ae..5911d61 100644 --- a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 407.19 ± 1.96 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 21.59 ± 0.01 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 354.63 ± 1.42 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 21.65 ± 0.01 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log index 3151c98..95db09a 100644 --- a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log +++ b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 541.57 ± 11.33 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 23.69 ± 0.00 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 512.14 ± 2.33 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 23.77 ± 0.00 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log index 25444a3..75aff14 100644 --- a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 441.64 ± 9.63 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 22.18 ± 0.01 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 383.01 ± 0.52 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 22.11 ± 0.01 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log index ec9c2e2..fbf9df3 100644 --- a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 122.56 ± 0.40 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 11.56 ± 0.00 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 122.66 ± 0.28 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 11.56 ± 0.01 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log index af1901a..3ce022f 100644 --- a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 97.32 ± 0.17 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 10.95 ± 0.00 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 97.36 ± 0.07 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 10.94 ± 0.01 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log index d914309..bb65d81 100644 --- a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 309.96 ± 4.20 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 10.79 ± 0.01 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 317.93 ± 2.34 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 10.80 ± 0.01 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log index a17d6c3..fc29ee0 100644 --- a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 258.85 ± 0.77 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 258.29 ± 0.54 | | qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 10.39 ± 0.01 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1__fa1.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1__fa1.log new file mode 100644 index 0000000..6459dce --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1098.31 ± 7.28 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 47.89 ± 0.32 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1__fa1__longctx32768.log new file mode 100644 index 0000000..73fe55f --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 681.43 ± 0.74 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 40.19 ± 0.03 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1.log index 77bc2b3..1de1e9d 100644 --- a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1.log +++ b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1079.44 ± 6.76 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 46.46 ± 0.01 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1080.97 ± 5.37 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 45.98 ± 0.02 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log index 28cdd02..5a170f1 100644 --- a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 762.29 ± 2.68 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 40.46 ± 0.02 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 611.53 ± 0.52 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 40.17 ± 0.02 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1.log index f34ef08..730ba29 100644 --- a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1.log +++ b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1092.86 ± 9.42 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 48.16 ± 0.30 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1111.77 ± 11.14 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 47.94 ± 0.00 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log index ca64073..a8b07e2 100644 --- a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 655.39 ± 2.00 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 41.90 ± 0.03 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 669.42 ± 1.32 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 41.96 ± 0.03 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1.log index a947901..56a7d97 100644 --- a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 661.63 ± 3.14 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 58.16 ± 0.08 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 663.48 ± 2.94 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 58.17 ± 0.04 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log index dcdc814..ea9d147 100644 --- a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 288.86 ± 0.53 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 44.24 ± 0.09 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 289.32 ± 1.23 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 44.23 ± 0.08 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1.log index bc44011..38c9115 100644 --- a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 1013.40 ± 39.22 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 59.13 ± 0.07 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 1016.96 ± 39.65 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 59.11 ± 0.06 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log index 2945779..f00163a 100644 --- a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 673.55 ± 0.64 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 48.93 ± 0.13 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 673.76 ± 0.15 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 49.03 ± 0.14 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1__fa1.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1__fa1.log new file mode 100644 index 0000000..4423708 --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 703.33 ± 5.75 | +| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 29.06 ± 0.00 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log new file mode 100644 index 0000000..be600e8 --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 489.36 ± 3.31 | +| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.31 ± 0.01 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log new file mode 100644 index 0000000..3d6dabe --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 698.12 ± 4.53 | +| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 25.88 ± 0.00 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..af70e44 --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 615.35 ± 3.03 | +| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.34 ± 0.01 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..4db4f18 --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 697.35 ± 3.46 | +| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 28.10 ± 0.01 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..5a8312d --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 553.41 ± 1.44 | +| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.57 ± 0.01 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..6ee1f2c --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 415.49 ± 2.30 | +| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 30.74 ± 0.01 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..6b9aafa --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 228.90 ± 0.28 | +| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 26.62 ± 0.01 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log new file mode 100644 index 0000000..077f6d0 --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 710.93 ± 18.62 | +| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 28.92 ± 0.01 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..8f81e29 --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 531.15 ± 0.20 | +| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 26.32 ± 0.03 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1__fa1.log new file mode 100644 index 0000000..4901fb5 --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 864.78 ± 7.29 | +| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 22.66 ± 0.00 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log new file mode 100644 index 0000000..5e08a1b --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 625.12 ± 11.57 | +| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.99 ± 0.23 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm6_4_4__fa1.log new file mode 100644 index 0000000..27617ae --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 839.13 ± 7.61 | +| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 20.75 ± 0.00 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..38d2b4a --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 646.67 ± 11.22 | +| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.86 ± 0.01 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..bdb11b0 --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 844.42 ± 19.66 | +| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 22.69 ± 0.00 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..cf1a239 --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 546.66 ± 5.96 | +| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 20.11 ± 0.07 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..1a94edd --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp512 | 117.96 ± 0.28 | +| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg128 | 16.18 ± 0.02 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..62d20bc --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 58.94 ± 0.07 | +| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 15.26 ± 0.01 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_radv__fa1.log new file mode 100644 index 0000000..882cb85 --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp512 | 376.52 ± 7.37 | +| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg128 | 14.74 ± 0.01 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..98e4674 --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 351.87 ± 1.49 | +| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 13.71 ± 0.06 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1__fa1.log new file mode 100644 index 0000000..e9d1e8e --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 1298.78 ± 7.07 | +| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 46.65 ± 0.23 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1__fa1__longctx32768.log new file mode 100644 index 0000000..fb7c672 --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 691.44 ± 4.50 | +| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 36.86 ± 0.02 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1.log new file mode 100644 index 0000000..20ea166 --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 1288.38 ± 10.85 | +| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 45.58 ± 0.01 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..aad4700 --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 721.84 ± 10.97 | +| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 36.17 ± 0.02 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..1db2c43 --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 1219.74 ± 15.04 | +| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 46.57 ± 0.01 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..8f086cb --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 600.42 ± 7.55 | +| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 36.88 ± 0.01 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..0699b0b --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp512 | 720.78 ± 2.94 | +| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg128 | 50.21 ± 0.06 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..5ec6aae --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 100.79 ± 0.03 | +| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 35.41 ± 0.01 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1.log new file mode 100644 index 0000000..ef94402 --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp512 | 1213.78 ± 7.36 | +| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg128 | 53.04 ± 0.10 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..6ac9013 --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 638.76 ± 1.76 | +| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 40.93 ± 0.09 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1__fa1.log new file mode 100644 index 0000000..d2dfb99 --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 1226.22 ± 161.95 | +| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 41.27 ± 0.01 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log new file mode 100644 index 0000000..a49c884 --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 667.32 ± 5.65 | +| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.43 ± 0.01 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1.log new file mode 100644 index 0000000..5fbd898 --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 1303.32 ± 8.77 | +| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 38.81 ± 0.01 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..84431bb --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 727.92 ± 8.62 | +| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 32.52 ± 0.35 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..834db9b --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 1294.40 ± 31.54 | +| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 41.09 ± 0.01 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..1203ea4 --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 596.78 ± 7.98 | +| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.35 ± 0.00 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..c5a3b18 --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp512 | 589.20 ± 1.79 | +| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg128 | 43.18 ± 0.07 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..9b69152 --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 97.70 ± 0.10 | +| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 31.82 ± 0.05 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1.log new file mode 100644 index 0000000..4ab7368 --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp512 | 944.96 ± 19.24 | +| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg128 | 43.58 ± 1.06 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..fd0cc70 --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 579.88 ± 0.09 | +| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 35.52 ± 0.07 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1__fa1.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1__fa1.log new file mode 100644 index 0000000..84cd32e --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 381.31 ± 4.26 | +| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 3.49 ± 0.00 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log new file mode 100644 index 0000000..46917bc --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 151.84 ± 1.65 | +| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.17 ± 0.02 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm6_4_4__fa1.log new file mode 100644 index 0000000..e55d3f2 --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 360.84 ± 1.60 | +| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 3.30 ± 0.00 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..9885e65 --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 154.86 ± 2.16 | +| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.14 ± 0.01 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..1b9e740 --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 373.84 ± 0.93 | +| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 3.41 ± 0.00 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..1c1e2dd --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 139.53 ± 1.98 | +| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.16 ± 0.05 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..2cf8a6a --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log @@ -0,0 +1,23 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +/lib64/libggml-base.so.0(+0x4465) [0x7f2a944d5465] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f2a944d582b] +/lib64/libggml-base.so.0(+0x16ed9) [0x7f2a944e7ed9] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7f2a93c4bbfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f2a93c35d3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7f2a93c4bea8] +/lib64/libggml-vulkan.so.0(+0x173b2) [0x7f2a945a53b2] +/lib64/libggml-vulkan.so.0(+0x1075d0) [0x7f2a946955d0] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7f2a944f1112] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f2a9810c530] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f2a9810eb05] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f2a9811526f] +/lib64/libllama.so.0(llama_decode+0xe) [0x7f2a98116bee] +/usr/sbin/llama-bench() [0x41a1bb] +/usr/sbin/llama-bench() [0x416ec6] +/lib64/libc.so.6(+0x35b5) [0x7f2a9391c5b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f2a9391c668] +/usr/sbin/llama-bench() [0x4189d5] +terminate called after throwing an instance of 'vk::DeviceLostError' + what(): vk::Queue::submit: ErrorDeviceLost +✖ ! [vulkan_amdvlk] gemma-4-31B-it-BF16-00001-of-00002__fa1 failed (exit 0) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..3a53be1 --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,23 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +/lib64/libggml-base.so.0(+0x4465) [0x7f7c4a4ab465] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f7c4a4ab82b] +/lib64/libggml-base.so.0(+0x16ed9) [0x7f7c4a4bded9] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7f7c49c21bfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f7c49c0bd3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7f7c49c21ea8] +/lib64/libggml-vulkan.so.0(+0x173b2) [0x7f7c4a57b3b2] +/lib64/libggml-vulkan.so.0(+0x1075d0) [0x7f7c4a66b5d0] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7f7c4a4c7112] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f7c4e0e2530] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f7c4e0e4b05] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f7c4e0eb26f] +/lib64/libllama.so.0(llama_decode+0xe) [0x7f7c4e0ecbee] +/usr/sbin/llama-bench() [0x41a1bb] +/usr/sbin/llama-bench() [0x416c49] +/lib64/libc.so.6(+0x35b5) [0x7f7c498f25b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f7c498f2668] +/usr/sbin/llama-bench() [0x4189d5] +terminate called after throwing an instance of 'vk::DeviceLostError' + what(): vk::Queue::submit: ErrorDeviceLost +✖ ! [vulkan_amdvlk] gemma-4-31B-it-BF16-00001-of-00002__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_radv__fa1.log new file mode 100644 index 0000000..ac42bfc --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | pp512 | 79.53 ± 0.26 | +| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | tg128 | 3.50 ± 0.00 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..7b3e175 --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 60.29 ± 0.37 | +| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 3.24 ± 0.00 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1__fa1.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1__fa1.log new file mode 100644 index 0000000..89cb0ee --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 313.26 ± 1.19 | +| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 10.51 ± 0.00 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1__fa1__longctx32768.log new file mode 100644 index 0000000..63b8782 --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 141.61 ± 1.71 | +| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.11 ± 0.00 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1.log new file mode 100644 index 0000000..66aac4c --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 309.22 ± 0.13 | +| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 10.02 ± 0.00 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..d7be7d7 --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 143.91 ± 1.64 | +| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.81 ± 0.00 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..e3b7d2d --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 315.72 ± 0.30 | +| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 10.51 ± 0.00 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..dd44eae --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 130.80 ± 1.45 | +| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.20 ± 0.00 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..2e5a7d2 --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | pp512 | 69.12 ± 0.02 | +| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | tg128 | 10.71 ± 0.00 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..4f6b526 --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,23 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +/lib64/libggml-base.so.0(+0x4465) [0x7f0f8a452465] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f0f8a45282b] +/lib64/libggml-base.so.0(+0x16ed9) [0x7f0f8a464ed9] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7f0f89bc8bfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f0f89bb2d3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7f0f89bc8ea8] +/lib64/libggml-vulkan.so.0(+0x173b2) [0x7f0f8a5223b2] +/lib64/libggml-vulkan.so.0(+0x1075d0) [0x7f0f8a6125d0] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7f0f8a46e112] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f0f8e089530] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f0f8e08bb05] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f0f8e09226f] +/lib64/libllama.so.0(llama_decode+0xe) [0x7f0f8e093bee] +/usr/sbin/llama-bench() [0x41a1bb] +/usr/sbin/llama-bench() [0x416ec6] +/lib64/libc.so.6(+0x35b5) [0x7f0f898995b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f0f89899668] +/usr/sbin/llama-bench() [0x4189d5] +terminate called after throwing an instance of 'vk::DeviceLostError' + what(): vk::Queue::submit: ErrorDeviceLost +✖ ! [vulkan_amdvlk] gemma-4-31B-it-UD-Q4_K_XL__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_radv__fa1.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_radv__fa1.log new file mode 100644 index 0000000..a66892a --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | pp512 | 244.46 ± 0.27 | +| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | tg128 | 11.02 ± 0.01 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..67bf195 --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 124.32 ± 1.04 | +| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 8.78 ± 0.00 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1__fa1.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1__fa1.log new file mode 100644 index 0000000..93e42e7 --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 309.13 ± 0.94 | +| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 6.16 ± 0.00 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log new file mode 100644 index 0000000..41d2e44 --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 140.72 ± 1.33 | +| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.26 ± 0.00 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1.log new file mode 100644 index 0000000..4c792b4 --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 303.40 ± 0.23 | +| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 6.07 ± 0.00 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..45ec95f --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 141.37 ± 1.98 | +| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.18 ± 0.08 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..0ef79f1 --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 319.75 ± 0.26 | +| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 6.12 ± 0.00 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..513ed3b --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 128.00 ± 1.30 | +| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.30 ± 0.00 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..c3c9edf --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | pp512 | 72.26 ± 0.02 | +| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | tg128 | 6.28 ± 0.00 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..cc5da6c --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,23 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +/lib64/libggml-base.so.0(+0x4465) [0x7f8de9476465] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f8de947682b] +/lib64/libggml-base.so.0(+0x16ed9) [0x7f8de9488ed9] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7f8de8becbfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f8de8bd6d3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7f8de8becea8] +/lib64/libggml-vulkan.so.0(+0x173b2) [0x7f8de95463b2] +/lib64/libggml-vulkan.so.0(+0x1075d0) [0x7f8de96365d0] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7f8de9492112] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f8ded0ad530] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f8ded0afb05] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f8ded0b626f] +/lib64/libllama.so.0(llama_decode+0xe) [0x7f8ded0b7bee] +/usr/sbin/llama-bench() [0x41a1bb] +/usr/sbin/llama-bench() [0x416ec6] +/lib64/libc.so.6(+0x35b5) [0x7f8de88bd5b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f8de88bd668] +/usr/sbin/llama-bench() [0x4189d5] +terminate called after throwing an instance of 'vk::DeviceLostError' + what(): vk::Queue::submit: ErrorDeviceLost +✖ ! [vulkan_amdvlk] gemma-4-31B-it-UD-Q8_K_XL__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_radv__fa1.log new file mode 100644 index 0000000..9f5b317 --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | pp512 | 209.20 ± 6.85 | +| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | tg128 | 6.28 ± 0.00 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..6b46536 --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 112.72 ± 1.26 | +| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 5.49 ± 0.00 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1__fa1.log new file mode 100644 index 0000000..3be07b2 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 633.00 ± 7.37 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.15 ± 0.36 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log new file mode 100644 index 0000000..3898436 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 296.45 ± 1.01 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 31.19 ± 8.13 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log index 492fecb..58b8b3f 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 648.44 ± 6.33 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 49.85 ± 0.04 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 648.77 ± 4.37 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 49.76 ± 0.02 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log index c6c534c..fc17a5b 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 442.64 ± 0.84 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 39.69 ± 0.34 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 439.58 ± 0.57 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 39.77 ± 0.03 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log index d70f3d4..e805b34 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 596.69 ± 97.42 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.38 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 641.05 ± 2.79 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 50.73 ± 0.01 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log index 9670ec2..3360e12 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 308.13 ± 1.66 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 35.43 ± 8.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 305.81 ± 0.24 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 36.03 ± 0.02 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log index e8675e6..ccde36b 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 576.81 ± 2.43 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 51.18 ± 0.04 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 575.01 ± 3.11 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 51.20 ± 0.06 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log index 315ed48..bf15ed0 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 213.74 ± 0.68 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 34.52 ± 0.07 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 214.22 ± 0.50 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 34.54 ± 0.04 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log index c0ad41d..7dfab3a 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 633.21 ± 13.06 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 56.15 ± 0.02 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 635.12 ± 4.23 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 56.31 ± 0.08 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log index fae75b7..b654b3f 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 287.49 ± 1.21 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 42.67 ± 0.03 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 290.19 ± 1.03 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 42.28 ± 0.06 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_1__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_1__fa1.log new file mode 100644 index 0000000..efb2f9d --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_1__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1631.41 ± 13.30 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.63 ± 0.10 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_1__fa1__longctx32768.log new file mode 100644 index 0000000..42c242a --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_1__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 489.21 ± 2.12 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 51.87 ± 0.03 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log index a7cf323..5d0098e 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1691.01 ± 16.68 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.07 ± 0.01 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1694.78 ± 17.20 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.19 ± 0.02 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log index 45bb062..293a0cb 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 730.11 ± 1.16 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 57.02 ± 0.07 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 730.43 ± 1.15 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 57.22 ± 0.03 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log index 932a36a..fa84182 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1668.50 ± 13.61 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.68 ± 0.10 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1635.73 ± 10.16 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.47 ± 0.03 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log index 65c4caa..1c1adcb 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 507.77 ± 2.81 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 57.46 ± 0.02 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 493.84 ± 1.85 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 52.05 ± 0.04 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log index fa14b74..a2ddf3c 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1303.99 ± 6.84 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 73.68 ± 0.08 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1306.44 ± 9.57 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 73.63 ± 0.06 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log index 99d5ac7..b322708 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 364.73 ± 0.33 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 49.99 ± 0.21 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 365.33 ± 0.21 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 49.83 ± 0.07 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log index 917f775..795bbaa 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1577.96 ± 12.64 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 78.94 ± 0.01 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1574.45 ± 16.63 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 79.03 ± 0.17 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log index 97ddca5..c1e124d 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 541.12 ± 0.20 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 60.77 ± 0.08 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 545.65 ± 0.37 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 60.70 ± 0.02 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_1__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_1__fa1.log new file mode 100644 index 0000000..fbf458f --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_1__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1541.76 ± 1.17 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.64 ± 0.16 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_1__fa1__longctx32768.log new file mode 100644 index 0000000..a4ec636 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_1__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 162.73 ± 0.61 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.65 ± 0.00 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log index 5103c84..ef866a8 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1528.37 ± 6.40 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.14 ± 0.02 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1524.35 ± 2.98 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.78 ± 0.02 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log index e0b84ff..4667568 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 196.35 ± 2.62 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.97 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 196.44 ± 1.93 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.96 ± 0.00 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log index 6e1ba6a..27dfff5 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1507.68 ± 1.23 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.41 ± 0.14 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1513.21 ± 2.79 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.41 ± 0.13 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log index c083ed2..182d7c6 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 206.97 ± 1.96 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 189.28 ± 0.40 | | llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.62 ± 0.00 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log index a1c3881..8d077eb 100644 --- a/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 324.82 ± 0.45 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 55.43 ± 0.14 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 326.38 ± 0.44 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 55.44 ± 0.11 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log index 0c19fe3..2853f6a 100644 --- a/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 144.29 ± 1.09 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 10.20 ± 0.01 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 145.14 ± 0.20 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 10.21 ± 0.00 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1.log index f0e5a2f..68e0eb3 100644 --- a/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 1313.97 ± 1.29 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 55.59 ± 0.05 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 1311.79 ± 0.38 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 55.59 ± 0.03 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log index 50c5ec7..d0a49c1 100644 --- a/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 208.18 ± 2.24 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 8.52 ± 0.01 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 210.35 ± 0.60 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 8.53 ± 0.01 | -build: 2405d59cb (8577) +build: 3f8752b55 (8743) diff --git a/benchmark/results/system_info.json b/benchmark/results/system_info.json index 2efeb7c..d9ce179 100644 --- a/benchmark/results/system_info.json +++ b/benchmark/results/system_info.json @@ -1 +1 @@ -{"distro": "Fedora Linux 43 (Workstation Edition)", "kernel": "6.19.9-200.fc43.x86_64", "linux_firmware": "linux-firmware-20260309-1.fc43.noarch", "timestamp": "29 Mar 2026"} +{"distro": "Fedora Linux 43 (Workstation Edition)", "kernel": "6.19.9-200.fc43.x86_64", "linux_firmware": "linux-firmware-20260309-1.fc43.noarch", "timestamp": "10 Apr 2026"} diff --git a/docs/results.json b/docs/results.json index ce60d51..ccecf06 100644 --- a/docs/results.json +++ b/docs/results.json @@ -1,16 +1,16 @@ { "meta": { - "generated_at": "2026-03-30T06:55:07Z", + "generated_at": "2026-04-11T10:18:28Z", "system_info": { "distro": "Fedora Linux 43 (Workstation Edition)", "kernel": "6.19.9-200.fc43.x86_64", "linux_firmware": "linux-firmware-20260309-1.fc43.noarch", - "timestamp": "29 Mar 2026" + "timestamp": "10 Apr 2026" }, "llamacpp_builds": [ { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" }, { "hash": "e0c93af2a", @@ -19,11 +19,9 @@ ], "environments": [ "rocm-7_2", - "rocm-7_2-hblt0", + "rocm-7_2_1", "rocm6_4_4", - "rocm6_4_4-hblt0", "rocm7-nightlies", - "rocm7-nightlies-hblt0", "vulkan_amdvlk", "vulkan_radv" ], @@ -33,15 +31,15 @@ { "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm-7_2", + "env": "rocm-7_2_1", "env_base": "rocm", - "env_variant": "7_2", + "env_variant": "7_2_1", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 78.37, - "tps_std": 0.1, + "tps_mean": 78.36, + "tps_std": 0.04, "error": false, "error_type": null, "backend": "ROCm", @@ -51,24 +49,24 @@ "file_size_gib": 70.31, "name_params_b": 125.03, "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1.log", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm-7_2", + "env": "rocm-7_2_1", "env_base": "rocm", - "env_variant": "7_2", + "env_variant": "7_2_1", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 2.96, + "tps_mean": 2.99, "tps_std": 0.0, "error": false, "error_type": null, @@ -79,25 +77,25 @@ "file_size_gib": 70.31, "name_params_b": 125.03, "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1.log", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm-7_2", + "env": "rocm-7_2_1", "env_base": "rocm", - "env_variant": "7_2", + "env_variant": "7_2_1", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 17.07, - "tps_std": 0.06, + "tps_mean": 17.67, + "tps_std": 0.05, "error": false, "error_type": null, "backend": "ROCm", @@ -107,137 +105,25 @@ "file_size_gib": 70.31, "name_params_b": 125.03, "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm-7_2", + "env": "rocm-7_2_1", "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.07, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 78.3, - "tps_std": 0.11, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 2.98, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 17.08, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", + "env_variant": "7_2_1", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", "tps_mean": 2.08, - "tps_std": 0.0, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -247,11 +133,11 @@ "file_size_gib": 70.31, "name_params_b": 125.03, "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -264,8 +150,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 78.37, - "tps_std": 0.13, + "tps_mean": 78.4, + "tps_std": 0.1, "error": false, "error_type": null, "backend": "ROCm", @@ -278,8 +164,8 @@ "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -306,8 +192,8 @@ "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -320,8 +206,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 17.56, - "tps_std": 0.07, + "tps_mean": 17.57, + "tps_std": 0.05, "error": false, "error_type": null, "backend": "ROCm", @@ -334,8 +220,8 @@ "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -349,7 +235,7 @@ "context_tokens": 32768, "test": "tg32 @ d32768", "tps_mean": 2.09, - "tps_std": 0.0, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -362,120 +248,8 @@ "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 78.63, - "tps_std": 0.08, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 2.8, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 17.58, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.08, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -488,63 +262,7 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 80.49, - "tps_std": 0.14, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 2.99, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 21.15, + "tps_mean": 78.06, "tps_std": 0.06, "error": false, "error_type": null, @@ -555,11 +273,67 @@ "file_size_gib": 70.31, "name_params_b": 125.03, "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 2.95, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 18.34, + "tps_std": 0.14, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -572,7 +346,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 2.07, + "tps_mean": 2.06, "tps_std": 0.01, "error": false, "error_type": null, @@ -586,120 +360,8 @@ "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 78.97, - "tps_std": 0.09, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 2.99, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 21.17, - "tps_std": 0.28, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.07, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -762,8 +424,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 47.2, - "tps_std": 0.03, + "tps_mean": 46.91, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "Vulkan", @@ -776,8 +438,8 @@ "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -804,8 +466,8 @@ "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -818,8 +480,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 7.39, - "tps_std": 0.02, + "tps_mean": 7.32, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", @@ -832,8 +494,8 @@ "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -846,7 +508,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 2.6, + "tps_mean": 2.59, "tps_std": 0.0, "error": false, "error_type": null, @@ -860,22 +522,22 @@ "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { "model": "GLM-4.7-Flash-BF16-00001-of-00002", "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm-7_2", + "env": "rocm-7_2_1", "env_base": "rocm", - "env_variant": "7_2", + "env_variant": "7_2_1", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 435.72, - "tps_std": 2.11, + "tps_mean": 404.18, + "tps_std": 1.94, "error": false, "error_type": null, "backend": "ROCm", @@ -885,192 +547,24 @@ "file_size_gib": 55.79, "name_params_b": 29.94, "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1.log", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { "model": "GLM-4.7-Flash-BF16-00001-of-00002", "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm-7_2", + "env": "rocm-7_2_1", "env_base": "rocm", - "env_variant": "7_2", + "env_variant": "7_2_1", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 21.38, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 87.93, - "tps_std": 0.11, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 15.33, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 403.83, - "tps_std": 1.74, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 21.37, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 87.49, - "tps_std": 0.23, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 15.33, + "tps_mean": 20.88, "tps_std": 0.01, "error": false, "error_type": null, @@ -1081,13 +575,94 @@ "file_size_gib": 55.79, "name_params_b": 29.94, "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm-7_2_1", + "env_base": "rocm", + "env_variant": "7_2_1", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 88.16, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm-7_2_1", + "env_base": "rocm", + "env_variant": "7_2_1", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 15.07, + "tps_std": 0.2, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": false, + "error_type": null, + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1.log", + "rpc": false, + "build": null + }, { "model": "GLM-4.7-Flash-BF16-00001-of-00002", "model_clean": "GLM-4.7-Flash-BF16", @@ -1098,8 +673,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 393.19, - "tps_std": 1.92, + "tps_mean": 392.81, + "tps_std": 1.96, "error": false, "error_type": null, "backend": "ROCm", @@ -1112,8 +687,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -1126,7 +701,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 19.36, + "tps_mean": 18.97, "tps_std": 0.0, "error": false, "error_type": null, @@ -1140,8 +715,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -1154,8 +729,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 94.74, - "tps_std": 0.05, + "tps_mean": 94.55, + "tps_std": 0.17, "error": false, "error_type": null, "backend": "ROCm", @@ -1168,8 +743,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -1182,8 +757,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 15.06, - "tps_std": 0.18, + "tps_mean": 14.89, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -1196,22 +771,22 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { "model": "GLM-4.7-Flash-BF16-00001-of-00002", "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 393.15, - "tps_std": 2.0, + "tps_mean": 400.57, + "tps_std": 4.1, "error": false, "error_type": null, "backend": "ROCm", @@ -1221,24 +796,80 @@ "file_size_gib": 55.79, "name_params_b": 29.94, "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { "model": "GLM-4.7-Flash-BF16-00001-of-00002", "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 21.11, + "tps_mean": 19.39, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 86.01, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 14.86, "tps_std": 0.03, "error": false, "error_type": null, @@ -1249,291 +880,11 @@ "file_size_gib": 55.79, "name_params_b": 29.94, "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 94.55, - "tps_std": 0.3, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 15.18, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 476.23, - "tps_std": 2.21, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 21.36, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 86.79, - "tps_std": 0.15, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 15.32, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 401.13, - "tps_std": 5.52, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 21.35, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 85.92, - "tps_std": 0.13, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 15.3, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -1546,7 +897,7 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 114.88, + "tps_mean": 114.94, "tps_std": 0.15, "error": false, "error_type": null, @@ -1560,8 +911,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -1575,7 +926,7 @@ "context_tokens": null, "test": "tg128", "tps_mean": 10.48, - "tps_std": 0.0, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", @@ -1588,8 +939,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -1602,7 +953,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 10.15, + "tps_mean": 10.14, "tps_std": 0.0, "error": false, "error_type": null, @@ -1616,8 +967,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -1630,7 +981,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 5.05, + "tps_mean": 5.04, "tps_std": 0.0, "error": false, "error_type": null, @@ -1644,8 +995,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -1658,8 +1009,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 342.11, - "tps_std": 5.47, + "tps_mean": 326.36, + "tps_std": 4.62, "error": false, "error_type": null, "backend": "Vulkan", @@ -1672,8 +1023,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -1686,7 +1037,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 9.48, + "tps_mean": 9.5, "tps_std": 0.02, "error": false, "error_type": null, @@ -1700,8 +1051,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -1714,8 +1065,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 102.93, - "tps_std": 0.11, + "tps_mean": 102.0, + "tps_std": 0.06, "error": false, "error_type": null, "backend": "Vulkan", @@ -1728,8 +1079,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -1743,7 +1094,7 @@ "context_tokens": 32768, "test": "tg32 @ d32768", "tps_mean": 7.82, - "tps_std": 0.01, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "Vulkan", @@ -1756,22 +1107,22 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { "model": "GLM-4.7-Flash-UD-Q8_K_XL", "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm-7_2", + "env": "rocm-7_2_1", "env_base": "rocm", - "env_variant": "7_2", + "env_variant": "7_2_1", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 905.79, - "tps_std": 2.84, + "tps_mean": 914.78, + "tps_std": 3.38, "error": false, "error_type": null, "backend": "ROCm", @@ -1781,24 +1132,24 @@ "file_size_gib": 32.7, "name_params_b": 29.94, "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1.log", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { "model": "GLM-4.7-Flash-UD-Q8_K_XL", "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm-7_2", + "env": "rocm-7_2_1", "env_base": "rocm", - "env_variant": "7_2", + "env_variant": "7_2_1", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 32.96, + "tps_mean": 32.73, "tps_std": 0.0, "error": false, "error_type": null, @@ -1809,25 +1160,25 @@ "file_size_gib": 32.7, "name_params_b": 29.94, "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1.log", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { "model": "GLM-4.7-Flash-UD-Q8_K_XL", "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm-7_2", + "env": "rocm-7_2_1", "env_base": "rocm", - "env_variant": "7_2", + "env_variant": "7_2_1", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 91.97, - "tps_std": 0.19, + "tps_mean": 91.49, + "tps_std": 0.75, "error": false, "error_type": null, "backend": "ROCm", @@ -1837,80 +1188,24 @@ "file_size_gib": 32.7, "name_params_b": 29.94, "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { "model": "GLM-4.7-Flash-UD-Q8_K_XL", "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm-7_2", + "env": "rocm-7_2_1", "env_base": "rocm", - "env_variant": "7_2", + "env_variant": "7_2_1", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 20.58, - "tps_std": 0.07, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 893.68, - "tps_std": 28.19, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 33.2, + "tps_mean": 20.47, "tps_std": 0.0, "error": false, "error_type": null, @@ -1921,67 +1216,11 @@ "file_size_gib": 32.7, "name_params_b": 29.94, "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 91.83, - "tps_std": 0.16, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 20.62, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -1994,8 +1233,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 903.39, - "tps_std": 1.04, + "tps_mean": 903.01, + "tps_std": 2.32, "error": false, "error_type": null, "backend": "ROCm", @@ -2008,8 +1247,8 @@ "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -2022,8 +1261,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 31.04, - "tps_std": 0.01, + "tps_mean": 30.77, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -2036,8 +1275,8 @@ "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -2050,7 +1289,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 99.84, + "tps_mean": 99.7, "tps_std": 0.23, "error": false, "error_type": null, @@ -2064,8 +1303,8 @@ "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -2078,7 +1317,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 20.33, + "tps_mean": 19.93, "tps_std": 0.0, "error": false, "error_type": null, @@ -2092,120 +1331,8 @@ "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 906.25, - "tps_std": 1.64, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 32.57, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 100.07, - "tps_std": 0.2, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 20.35, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -2218,8 +1345,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 902.85, - "tps_std": 59.94, + "tps_mean": 888.7, + "tps_std": 38.05, "error": false, "error_type": null, "backend": "ROCm", @@ -2232,8 +1359,8 @@ "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -2246,7 +1373,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 33.16, + "tps_mean": 32.81, "tps_std": 0.0, "error": false, "error_type": null, @@ -2260,8 +1387,8 @@ "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -2274,8 +1401,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 89.75, - "tps_std": 0.31, + "tps_mean": 89.53, + "tps_std": 0.09, "error": false, "error_type": null, "backend": "ROCm", @@ -2288,8 +1415,8 @@ "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -2302,7 +1429,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 20.59, + "tps_mean": 20.47, "tps_std": 0.0, "error": false, "error_type": null, @@ -2316,120 +1443,8 @@ "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 906.42, - "tps_std": 2.57, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 33.18, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 89.99, - "tps_std": 0.26, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 20.6, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -2442,8 +1457,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 499.98, - "tps_std": 1.29, + "tps_mean": 499.74, + "tps_std": 1.49, "error": false, "error_type": null, "backend": "Vulkan", @@ -2456,8 +1471,8 @@ "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -2470,8 +1485,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 39.01, - "tps_std": 0.01, + "tps_mean": 39.02, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "Vulkan", @@ -2484,8 +1499,8 @@ "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -2523,8 +1538,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 853.46, - "tps_std": 6.7, + "tps_mean": 875.8, + "tps_std": 2.04, "error": false, "error_type": null, "backend": "Vulkan", @@ -2537,8 +1552,8 @@ "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -2551,8 +1566,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 40.38, - "tps_std": 1.22, + "tps_mean": 40.19, + "tps_std": 1.39, "error": false, "error_type": null, "backend": "Vulkan", @@ -2565,8 +1580,8 @@ "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -2579,7 +1594,35 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 133.27, + "tps_mean": 134.1, + "tps_std": 0.07, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 21.18, "tps_std": 0.03, "error": false, "error_type": null, @@ -2593,50 +1636,22 @@ "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 21.17, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { "model": "Ministral-3-14B-Instruct-2512-BF16", "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm-7_2", + "env": "rocm-7_2_1", "env_base": "rocm", - "env_variant": "7_2", + "env_variant": "7_2_1", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 673.71, - "tps_std": 1.42, + "tps_mean": 747.81, + "tps_std": 7.01, "error": false, "error_type": null, "backend": "ROCm", @@ -2646,131 +1661,19 @@ "file_size_gib": 25.16, "name_params_b": 13.51, "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1.log", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { "model": "Ministral-3-14B-Instruct-2512-BF16", "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm-7_2", + "env": "rocm-7_2_1", "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 8.35, - "tps_std": 0.17, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 160.3, - "tps_std": 0.21, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 7.08, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 748.37, - "tps_std": 4.67, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", + "env_variant": "7_2_1", "fa": true, "context": "default", "context_tokens": null, @@ -2786,25 +1689,25 @@ "file_size_gib": 25.16, "name_params_b": 13.51, "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1.log", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { "model": "Ministral-3-14B-Instruct-2512-BF16", "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm-7_2-hblt0", + "env": "rocm-7_2_1", "env_base": "rocm", - "env_variant": "7_2-hblt0", + "env_variant": "7_2_1", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 165.11, - "tps_std": 1.59, + "tps_mean": 182.77, + "tps_std": 1.55, "error": false, "error_type": null, "backend": "ROCm", @@ -2814,19 +1717,19 @@ "file_size_gib": 25.16, "name_params_b": 13.51, "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1__longctx32768.log", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { "model": "Ministral-3-14B-Instruct-2512-BF16", "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm-7_2-hblt0", + "env": "rocm-7_2_1", "env_base": "rocm", - "env_variant": "7_2-hblt0", + "env_variant": "7_2_1", "fa": true, "context": "longctx32768", "context_tokens": 32768, @@ -2842,11 +1745,11 @@ "file_size_gib": 25.16, "name_params_b": 13.51, "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1__longctx32768.log", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -2859,8 +1762,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 706.16, - "tps_std": 3.76, + "tps_mean": 716.84, + "tps_std": 2.31, "error": false, "error_type": null, "backend": "ROCm", @@ -2873,8 +1776,8 @@ "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -2901,8 +1804,8 @@ "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -2915,8 +1818,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 166.61, - "tps_std": 1.47, + "tps_mean": 166.12, + "tps_std": 0.76, "error": false, "error_type": null, "backend": "ROCm", @@ -2929,8 +1832,8 @@ "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -2957,120 +1860,8 @@ "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 710.61, - "tps_std": 4.31, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 8.39, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 161.22, - "tps_std": 2.64, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 7.06, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -3083,8 +1874,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 881.45, - "tps_std": 3.04, + "tps_mean": 726.92, + "tps_std": 2.95, "error": false, "error_type": null, "backend": "ROCm", @@ -3097,8 +1888,8 @@ "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -3111,7 +1902,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 8.44, + "tps_mean": 8.43, "tps_std": 0.0, "error": false, "error_type": null, @@ -3125,8 +1916,8 @@ "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -3139,8 +1930,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 213.5, - "tps_std": 0.7, + "tps_mean": 162.4, + "tps_std": 0.45, "error": false, "error_type": null, "backend": "ROCm", @@ -3153,8 +1944,8 @@ "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -3167,7 +1958,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 7.08, + "tps_mean": 7.07, "tps_std": 0.0, "error": false, "error_type": null, @@ -3181,120 +1972,8 @@ "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 735.98, - "tps_std": 5.08, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 8.44, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 205.29, - "tps_std": 0.84, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 7.08, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -3357,8 +2036,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 166.51, - "tps_std": 0.62, + "tps_mean": 164.2, + "tps_std": 0.55, "error": false, "error_type": null, "backend": "Vulkan", @@ -3371,8 +2050,8 @@ "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -3399,8 +2078,8 @@ "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -3413,8 +2092,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 72.65, - "tps_std": 0.24, + "tps_mean": 72.41, + "tps_std": 0.21, "error": false, "error_type": null, "backend": "Vulkan", @@ -3427,8 +2106,8 @@ "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -3441,7 +2120,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 6.7, + "tps_mean": 6.71, "tps_std": 0.0, "error": false, "error_type": null, @@ -3455,22 +2134,22 @@ "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm-7_2", + "env": "rocm-7_2_1", "env_base": "rocm", - "env_variant": "7_2", + "env_variant": "7_2_1", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 259.71, - "tps_std": 1.51, + "tps_mean": 264.16, + "tps_std": 1.43, "error": false, "error_type": null, "backend": "ROCm", @@ -3480,25 +2159,25 @@ "file_size_gib": 78.02, "name_params_b": 120.67, "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__fa1.log", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm-7_2", + "env": "rocm-7_2_1", "env_base": "rocm", - "env_variant": "7_2", + "env_variant": "7_2_1", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 15.74, - "tps_std": 0.0, + "tps_mean": 16.01, + "tps_std": 0.04, "error": false, "error_type": null, "backend": "ROCm", @@ -3508,25 +2187,25 @@ "file_size_gib": 78.02, "name_params_b": 120.67, "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__fa1.log", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm-7_2", + "env": "rocm-7_2_1", "env_base": "rocm", - "env_variant": "7_2", + "env_variant": "7_2_1", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 302.29, - "tps_std": 0.44, + "tps_mean": 299.29, + "tps_std": 0.4, "error": false, "error_type": null, "backend": "ROCm", @@ -3536,131 +2215,19 @@ "file_size_gib": 78.02, "name_params_b": 120.67, "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm-7_2", + "env": "rocm-7_2_1", "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 15.58, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 263.95, - "tps_std": 1.57, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 16.02, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 305.52, - "tps_std": 0.18, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", + "env_variant": "7_2_1", "fa": true, "context": "longctx32768", "context_tokens": 32768, @@ -3676,11 +2243,11 @@ "file_size_gib": 78.02, "name_params_b": 120.67, "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -3693,8 +2260,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 260.98, - "tps_std": 1.56, + "tps_mean": 261.51, + "tps_std": 1.5, "error": false, "error_type": null, "backend": "ROCm", @@ -3707,8 +2274,8 @@ "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -3721,7 +2288,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 15.6, + "tps_mean": 15.63, "tps_std": 0.0, "error": false, "error_type": null, @@ -3735,8 +2302,8 @@ "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -3749,8 +2316,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 324.69, - "tps_std": 0.27, + "tps_mean": 323.83, + "tps_std": 0.28, "error": false, "error_type": null, "backend": "ROCm", @@ -3763,8 +2330,8 @@ "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -3777,7 +2344,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 15.38, + "tps_mean": 15.34, "tps_std": 0.01, "error": false, "error_type": null, @@ -3791,120 +2358,8 @@ "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 262.3, - "tps_std": 1.42, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 15.86, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 324.11, - "tps_std": 0.41, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 15.45, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -3917,246 +2372,106 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 228.5, - "tps_std": 1.06, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 16.07, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 284.43, - "tps_std": 0.38, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 15.76, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 236.84, - "tps_std": 1.32, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 16.04, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 290.04, - "tps_std": 0.11, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 15.76, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 140.84, + "tps_mean": 232.54, "tps_std": 0.27, "error": false, "error_type": null, - "backend": "Vulkan", + "backend": "ROCm", "ngl": 99, "mmap": 0, "params_b": 120.67, "file_size_gib": 78.02, "name_params_b": 120.67, "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 15.81, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 284.15, + "tps_std": 0.23, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 15.34, + "tps_std": 0.61, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" } }, { @@ -4168,9 +2483,9 @@ "fa": true, "context": "default", "context_tokens": null, - "test": "tg128", - "tps_mean": 13.97, - "tps_std": 0.05, + "test": "pp512", + "tps_mean": 138.78, + "tps_std": 0.58, "error": false, "error_type": null, "backend": "Vulkan", @@ -4183,8 +2498,36 @@ "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.45, + "tps_std": 0.09, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" } }, { @@ -4197,8 +2540,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 106.21, - "tps_std": 0.16, + "tps_mean": 104.96, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "Vulkan", @@ -4211,8 +2554,8 @@ "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -4225,2239 +2568,335 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 13.21, - "tps_std": 0.32, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 190.66, - "tps_std": 7.11, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 14.41, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 191.12, - "tps_std": 0.46, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 13.85, - "tps_std": 0.26, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 194.62, - "tps_std": 1.88, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 14.64, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 49.93, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 8.41, - "tps_std": 0.37, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 194.32, - "tps_std": 1.84, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 14.8, - "tps_std": 0.11, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 49.97, - "tps_std": 0.14, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 8.63, - "tps_std": 0.11, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 197.89, - "tps_std": 1.84, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 12.96, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 70.32, - "tps_std": 0.08, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 8.22, - "tps_std": 0.11, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 198.04, - "tps_std": 1.9, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 13.11, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 70.27, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 8.02, - "tps_std": 0.31, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 197.46, - "tps_std": 1.16, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 15.31, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 48.93, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 10.34, - "tps_std": 0.41, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 194.0, - "tps_std": 1.99, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 15.33, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 49.33, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 10.46, - "tps_std": 0.34, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 106.76, - "tps_std": 0.77, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 16.49, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 26.54, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 9.57, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 158.81, - "tps_std": 2.4, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 17.16, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 28.25, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 11.49, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 474.42, - "tps_std": 2.29, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 25.36, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 189.58, - "tps_std": 0.12, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 18.53, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 479.78, - "tps_std": 2.72, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 25.37, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 180.26, - "tps_std": 0.37, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 18.54, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 556.96, - "tps_std": 2.81, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 23.05, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 238.1, - "tps_std": 0.11, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 18.26, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 556.8, - "tps_std": 4.26, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 25.16, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 237.85, - "tps_std": 0.37, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 18.46, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 503.77, - "tps_std": 3.42, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 25.54, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 182.59, - "tps_std": 0.07, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 18.25, - "tps_std": 1.21, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 482.99, - "tps_std": 0.86, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 25.49, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 175.78, - "tps_std": 0.14, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 18.94, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 171.39, - "tps_std": 0.81, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 10.48, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 73.62, - "tps_std": 0.08, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 8.49, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 369.06, - "tps_std": 2.58, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 9.38, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 140.81, - "tps_std": 0.52, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 8.24, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 994.33, - "tps_std": 11.86, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 55.87, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 209.15, - "tps_std": 0.11, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 30.88, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 994.22, - "tps_std": 10.06, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 55.84, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 208.93, + "tps_mean": 13.66, "tps_std": 0.21, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log", + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 30.9, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", "env_variant": null, "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1009.37, - "tps_std": 9.64, + "tps_mean": 191.97, + "tps_std": 8.53, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log", + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", "env_variant": null, "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 53.13, - "tps_std": 0.02, + "tps_mean": 14.7, + "tps_std": 0.35, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log", + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", "env_variant": null, "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 285.49, + "tps_mean": 185.9, "tps_std": 0.18, "error": false, "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 14.21, + "tps_std": 0.38, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7_2_1", + "env_base": "rocm", + "env_variant": "7_2_1", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1148.88, + "tps_std": 7.32, + "error": false, + "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, "params_b": 30.53, - "file_size_gib": 24.53, + "file_size_gib": 17.35, "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log", + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7_2_1", + "env_base": "rocm", + "env_variant": "7_2_1", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 67.8, + "tps_std": 0.73, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7_2_1", + "env_base": "rocm", + "env_variant": "7_2_1", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 194.78, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7_2_1", + "env_base": "rocm", + "env_variant": "7_2_1", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 33.46, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1167.77, + "tps_std": 7.34, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 65.06, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 291.69, + "tps_std": 0.26, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "env": "rocm6_4_4", "env_base": "rocm6_4_4", "env_variant": null, @@ -6465,7 +2904,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 30.53, + "tps_mean": 32.81, "tps_std": 0.02, "error": false, "error_type": null, @@ -6473,55 +2912,55 @@ "ngl": 99, "mmap": 0, "params_b": 30.53, - "file_size_gib": 24.53, + "file_size_gib": 17.35, "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log", + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1012.69, - "tps_std": 9.02, + "tps_mean": 1185.86, + "tps_std": 2.21, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, "params_b": 30.53, - "file_size_gib": 24.53, + "file_size_gib": 17.35, "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log", + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 54.94, + "tps_mean": 68.95, "tps_std": 0.01, "error": false, "error_type": null, @@ -6529,131 +2968,19 @@ "ngl": 99, "mmap": 0, "params_b": 30.53, - "file_size_gib": 24.53, + "file_size_gib": 17.35, "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log", + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 285.25, - "tps_std": 0.16, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 30.5, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1023.24, - "tps_std": 11.08, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 56.92, - "tps_std": 0.17, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "env": "rocm7-nightlies", "env_base": "rocm7", "env_variant": "nightlies", @@ -6661,906 +2988,10 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 206.37, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 31.69, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1012.11, - "tps_std": 7.91, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 56.92, - "tps_std": 0.21, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 205.56, - "tps_std": 0.23, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 31.62, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 828.53, - "tps_std": 4.66, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 63.31, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 110.5, - "tps_std": 0.14, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 25.87, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1145.66, - "tps_std": 9.68, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 68.15, + "tps_mean": 203.21, "tps_std": 0.11, "error": false, "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 193.03, - "tps_std": 0.86, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 34.02, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1149.31, - "tps_std": 13.28, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 67.72, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 212.38, - "tps_std": 0.33, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 33.49, - "tps_std": 0.23, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1149.83, - "tps_std": 7.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 68.05, - "tps_std": 0.68, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 212.45, - "tps_std": 0.21, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 33.52, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1170.01, - "tps_std": 7.53, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 65.12, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 291.28, - "tps_std": 0.14, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 32.65, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1167.76, - "tps_std": 7.7, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 64.97, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 290.12, - "tps_std": 2.48, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 32.65, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1178.27, - "tps_std": 10.86, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 69.46, - "tps_std": 0.62, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 209.14, - "tps_std": 0.1, - "error": false, - "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, @@ -7571,8 +3002,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -7585,7 +3016,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 34.83, + "tps_mean": 34.77, "tps_std": 0.02, "error": false, "error_type": null, @@ -7599,120 +3030,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1172.23, - "tps_std": 12.92, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 69.65, - "tps_std": 0.62, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 209.05, - "tps_std": 0.22, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 34.76, - "tps_std": 0.25, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -7725,8 +3044,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 882.98, - "tps_std": 3.84, + "tps_mean": 882.68, + "tps_std": 3.83, "error": false, "error_type": null, "backend": "Vulkan", @@ -7739,8 +3058,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -7753,7 +3072,35 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 80.84, + "tps_mean": 80.48, + "tps_std": 0.06, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 111.69, "tps_std": 0.03, "error": false, "error_type": null, @@ -7764,39 +3111,11 @@ "file_size_gib": 17.35, "name_params_b": 30.53, "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 111.65, - "tps_std": 0.08, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -7810,7 +3129,7 @@ "context_tokens": 32768, "test": "tg32 @ d32768", "tps_mean": 28.03, - "tps_std": 0.02, + "tps_std": 0.03, "error": false, "error_type": null, "backend": "Vulkan", @@ -7823,8 +3142,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -7837,8 +3156,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1290.5, - "tps_std": 7.83, + "tps_mean": 1145.07, + "tps_std": 8.85, "error": false, "error_type": null, "backend": "Vulkan", @@ -7851,8 +3170,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -7865,8 +3184,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 83.79, - "tps_std": 0.18, + "tps_mean": 82.16, + "tps_std": 3.05, "error": false, "error_type": null, "backend": "Vulkan", @@ -7879,8 +3198,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -7893,8 +3212,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 194.26, - "tps_std": 0.86, + "tps_mean": 194.38, + "tps_std": 0.33, "error": false, "error_type": null, "backend": "Vulkan", @@ -7907,8 +3226,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -7921,8 +3240,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 37.04, - "tps_std": 0.04, + "tps_mean": 37.09, + "tps_std": 0.05, "error": false, "error_type": null, "backend": "Vulkan", @@ -7935,1816 +3254,52 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", - "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", - "env": "rocm-7_2", + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm-7_2_1", "env_base": "rocm", - "env_variant": "7_2", + "env_variant": "7_2_1", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 613.58, - "tps_std": 2.84, + "tps_mean": 312.22, + "tps_std": 2.95, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.94, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__fa1.log", + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", - "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", - "env": "rocm-7_2", + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm-7_2_1", "env_base": "rocm", - "env_variant": "7_2", + "env_variant": "7_2_1", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 29.81, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.94, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", - "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 447.94, - "tps_std": 2.75, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.94, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", - "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 26.06, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.94, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", - "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 583.48, - "tps_std": 105.06, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.94, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", - "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 29.85, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.94, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", - "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 439.93, - "tps_std": 32.65, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.94, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", - "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 26.09, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.94, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", - "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 627.46, - "tps_std": 3.12, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.94, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", - "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 28.02, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.94, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", - "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 527.78, - "tps_std": 1.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.94, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", - "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 26.22, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.94, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", - "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 623.64, - "tps_std": 17.23, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.94, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", - "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 29.16, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.94, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", - "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 512.14, - "tps_std": 3.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.94, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", - "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 26.33, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.94, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", - "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 622.47, - "tps_std": 8.34, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.94, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", - "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 29.93, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.94, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", - "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 443.0, - "tps_std": 30.64, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.94, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", - "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 26.97, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.94, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", - "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 600.08, - "tps_std": 13.59, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.94, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", - "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 29.99, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.94, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", - "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 457.86, - "tps_std": 1.53, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.94, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", - "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 27.08, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.94, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", - "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 396.36, - "tps_std": 1.71, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.94, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", - "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 30.9, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.94, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", - "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 208.44, - "tps_std": 0.64, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.94, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", - "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 26.08, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.94, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", - "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 500.88, - "tps_std": 3.3, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.94, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", - "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 31.74, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.94, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", - "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 420.42, - "tps_std": 0.15, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.94, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", - "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 28.05, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.94, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 629.03, - "tps_std": 5.19, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 31.37, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 439.65, - "tps_std": 0.69, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 27.25, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 648.58, - "tps_std": 5.44, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 30.86, - "tps_std": 0.77, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 457.6, - "tps_std": 1.26, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 27.22, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 634.72, - "tps_std": 6.93, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 29.9, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 511.83, - "tps_std": 7.51, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 27.45, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 632.78, - "tps_std": 59.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 30.53, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 507.17, - "tps_std": 2.44, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 27.44, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 621.13, - "tps_std": 82.64, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 31.52, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 467.56, - "tps_std": 0.55, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 28.26, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 661.07, - "tps_std": 5.8, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 31.54, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 432.86, - "tps_std": 0.82, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 28.2, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 436.5, - "tps_std": 7.59, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 34.72, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 218.02, - "tps_std": 0.85, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 28.5, - "tps_std": 0.07, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 587.86, - "tps_std": 37.36, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 36.28, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 453.76, - "tps_std": 0.75, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 31.67, + "tps_mean": 19.6, "tps_std": 0.06, "error": false, "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 305.11, - "tps_std": 1.38, - "error": false, - "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, @@ -9752,727 +3307,363 @@ "file_size_gib": 85.6, "name_params_b": 122.11, "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__fa1.log", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm-7_2", + "env": "rocm-7_2_1", "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 19.18, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", + "env_variant": "7_2_1", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 246.59, - "tps_std": 0.45, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 16.49, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 313.18, - "tps_std": 2.32, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 19.62, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 243.74, - "tps_std": 1.45, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 16.51, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 311.68, - "tps_std": 1.84, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 18.77, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 273.72, - "tps_std": 1.22, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 17.91, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 306.06, - "tps_std": 2.55, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 19.25, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 270.84, - "tps_std": 0.74, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 17.98, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 314.27, - "tps_std": 4.13, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 19.66, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 235.12, - "tps_std": 5.24, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 18.36, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 310.73, - "tps_std": 1.65, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 19.61, - "tps_std": 0.11, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 241.56, - "tps_std": 5.74, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 18.35, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 183.05, - "tps_std": 1.84, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 21.31, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 102.85, + "tps_mean": 250.19, "tps_std": 0.15, "error": false, "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm-7_2_1", + "env_base": "rocm", + "env_variant": "7_2_1", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 15.88, + "tps_std": 1.13, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 316.73, + "tps_std": 1.27, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 18.86, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 275.81, + "tps_std": 1.62, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 17.9, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 318.17, + "tps_std": 2.18, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 19.57, + "tps_std": 0.14, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 223.61, + "tps_std": 0.83, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 18.32, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 182.98, + "tps_std": 1.82, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 21.33, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 102.84, + "tps_std": 0.14, + "error": false, + "error_type": null, "backend": "Vulkan", "ngl": 99, "mmap": 0, @@ -10483,8 +3674,8 @@ "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -10497,8 +3688,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 18.76, - "tps_std": 0.02, + "tps_mean": 18.77, + "tps_std": 0.04, "error": false, "error_type": null, "backend": "Vulkan", @@ -10511,8 +3702,8 @@ "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -10525,8 +3716,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 239.56, - "tps_std": 7.45, + "tps_mean": 241.15, + "tps_std": 7.43, "error": false, "error_type": null, "backend": "Vulkan", @@ -10539,8 +3730,8 @@ "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -10553,8 +3744,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 21.68, - "tps_std": 0.38, + "tps_mean": 21.75, + "tps_std": 0.24, "error": false, "error_type": null, "backend": "Vulkan", @@ -10567,8 +3758,8 @@ "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -10581,8 +3772,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 203.34, - "tps_std": 0.47, + "tps_mean": 202.99, + "tps_std": 0.41, "error": false, "error_type": null, "backend": "Vulkan", @@ -10595,8 +3786,8 @@ "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -10623,22 +3814,22 @@ "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", "model_clean": "Qwen3.5-35B-A3B-BF16", - "env": "rocm-7_2", + "env": "rocm-7_2_1", "env_base": "rocm", - "env_variant": "7_2", + "env_variant": "7_2_1", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 508.43, - "tps_std": 3.18, + "tps_mean": 509.12, + "tps_std": 4.29, "error": false, "error_type": null, "backend": "ROCm", @@ -10648,24 +3839,80 @@ "file_size_gib": 64.6, "name_params_b": 34.66, "quant": "BF16", - "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log", + "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", "model_clean": "Qwen3.5-35B-A3B-BF16", - "env": "rocm-7_2", + "env": "rocm-7_2_1", "env_base": "rocm", - "env_variant": "7_2", + "env_variant": "7_2_1", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 23.65, + "tps_mean": 23.63, + "tps_std": 0.12, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.5-35B-A3B-BF16", + "env": "rocm-7_2_1", + "env_base": "rocm", + "env_variant": "7_2_1", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 390.8, + "tps_std": 4.3, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.5-35B-A3B-BF16", + "env": "rocm-7_2_1", + "env_base": "rocm", + "env_variant": "7_2_1", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 21.61, "tps_std": 0.0, "error": false, "error_type": null, @@ -10676,179 +3923,11 @@ "file_size_gib": 64.6, "name_params_b": 34.66, "quant": "BF16", - "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log", + "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.5-35B-A3B-BF16", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 423.84, - "tps_std": 0.73, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.5-35B-A3B-BF16", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 21.64, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.5-35B-A3B-BF16", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 512.17, - "tps_std": 3.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.5-35B-A3B-BF16", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 23.69, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.5-35B-A3B-BF16", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 382.51, - "tps_std": 1.54, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.5-35B-A3B-BF16", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 21.63, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -10861,8 +3940,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 544.11, - "tps_std": 3.06, + "tps_mean": 545.2, + "tps_std": 3.5, "error": false, "error_type": null, "backend": "ROCm", @@ -10875,8 +3954,8 @@ "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -10889,8 +3968,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 21.4, - "tps_std": 0.01, + "tps_mean": 21.99, + "tps_std": 0.05, "error": false, "error_type": null, "backend": "ROCm", @@ -10903,8 +3982,8 @@ "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -10917,8 +3996,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 407.19, - "tps_std": 1.96, + "tps_mean": 354.63, + "tps_std": 1.42, "error": false, "error_type": null, "backend": "ROCm", @@ -10931,8 +4010,8 @@ "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -10945,7 +4024,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 21.59, + "tps_mean": 21.65, "tps_std": 0.01, "error": false, "error_type": null, @@ -10959,120 +4038,8 @@ "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.5-35B-A3B-BF16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 550.28, - "tps_std": 3.67, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.5-35B-A3B-BF16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 23.23, - "tps_std": 0.18, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.5-35B-A3B-BF16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 405.13, - "tps_std": 1.85, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.5-35B-A3B-BF16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 21.72, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -11085,8 +4052,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 541.57, - "tps_std": 11.33, + "tps_mean": 512.14, + "tps_std": 2.33, "error": false, "error_type": null, "backend": "ROCm", @@ -11099,8 +4066,8 @@ "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -11113,7 +4080,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 23.69, + "tps_mean": 23.77, "tps_std": 0.0, "error": false, "error_type": null, @@ -11127,8 +4094,8 @@ "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -11141,8 +4108,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 441.64, - "tps_std": 9.63, + "tps_mean": 383.01, + "tps_std": 0.52, "error": false, "error_type": null, "backend": "ROCm", @@ -11155,8 +4122,8 @@ "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -11169,7 +4136,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 22.18, + "tps_mean": 22.11, "tps_std": 0.01, "error": false, "error_type": null, @@ -11183,120 +4150,8 @@ "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.5-35B-A3B-BF16", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 509.57, - "tps_std": 8.2, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.5-35B-A3B-BF16", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 23.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.5-35B-A3B-BF16", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 379.36, - "tps_std": 1.33, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.5-35B-A3B-BF16", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 22.2, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -11309,8 +4164,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 122.56, - "tps_std": 0.4, + "tps_mean": 122.66, + "tps_std": 0.28, "error": false, "error_type": null, "backend": "Vulkan", @@ -11323,8 +4178,8 @@ "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -11338,7 +4193,7 @@ "context_tokens": null, "test": "tg128", "tps_mean": 11.56, - "tps_std": 0.0, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", @@ -11351,8 +4206,8 @@ "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -11365,8 +4220,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 97.32, - "tps_std": 0.17, + "tps_mean": 97.36, + "tps_std": 0.07, "error": false, "error_type": null, "backend": "Vulkan", @@ -11379,8 +4234,8 @@ "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -11393,8 +4248,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 10.95, - "tps_std": 0.0, + "tps_mean": 10.94, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", @@ -11407,8 +4262,8 @@ "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -11421,8 +4276,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 309.96, - "tps_std": 4.2, + "tps_mean": 317.93, + "tps_std": 2.34, "error": false, "error_type": null, "backend": "Vulkan", @@ -11435,8 +4290,8 @@ "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -11449,7 +4304,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 10.79, + "tps_mean": 10.8, "tps_std": 0.01, "error": false, "error_type": null, @@ -11463,8 +4318,8 @@ "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -11477,8 +4332,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 258.85, - "tps_std": 0.77, + "tps_mean": 258.29, + "tps_std": 0.54, "error": false, "error_type": null, "backend": "Vulkan", @@ -11491,8 +4346,8 @@ "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -11519,22 +4374,22 @@ "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "env": "rocm-7_2", + "env": "rocm-7_2_1", "env_base": "rocm", - "env_variant": "7_2", + "env_variant": "7_2_1", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1071.38, - "tps_std": 11.2, + "tps_mean": 1098.31, + "tps_std": 7.28, "error": false, "error_type": null, "backend": "ROCm", @@ -11544,25 +4399,25 @@ "file_size_gib": 20.7, "name_params_b": 34.66, "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__fa1.log", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "env": "rocm-7_2", + "env": "rocm-7_2_1", "env_base": "rocm", - "env_variant": "7_2", + "env_variant": "7_2_1", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 47.78, - "tps_std": 0.01, + "tps_mean": 47.89, + "tps_std": 0.32, "error": false, "error_type": null, "backend": "ROCm", @@ -11572,25 +4427,25 @@ "file_size_gib": 20.7, "name_params_b": 34.66, "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__fa1.log", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "env": "rocm-7_2", + "env": "rocm-7_2_1", "env_base": "rocm", - "env_variant": "7_2", + "env_variant": "7_2_1", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 669.09, - "tps_std": 1.13, + "tps_mean": 681.43, + "tps_std": 0.74, "error": false, "error_type": null, "backend": "ROCm", @@ -11600,19 +4455,19 @@ "file_size_gib": 20.7, "name_params_b": 34.66, "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__fa1__longctx32768.log", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "env": "rocm-7_2", + "env": "rocm-7_2_1", "env_base": "rocm", - "env_variant": "7_2", + "env_variant": "7_2_1", "fa": true, "context": "longctx32768", "context_tokens": 32768, @@ -11628,123 +4483,11 @@ "file_size_gib": 20.7, "name_params_b": 34.66, "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__fa1__longctx32768.log", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1093.96, - "tps_std": 6.37, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.7, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 47.95, - "tps_std": 0.3, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.7, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 661.4, - "tps_std": 0.76, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.7, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 40.13, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.7, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -11757,8 +4500,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1079.44, - "tps_std": 6.76, + "tps_mean": 1080.97, + "tps_std": 5.37, "error": false, "error_type": null, "backend": "ROCm", @@ -11771,8 +4514,8 @@ "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -11785,8 +4528,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 46.46, - "tps_std": 0.01, + "tps_mean": 45.98, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "ROCm", @@ -11799,8 +4542,8 @@ "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -11813,8 +4556,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 762.29, - "tps_std": 2.68, + "tps_mean": 611.53, + "tps_std": 0.52, "error": false, "error_type": null, "backend": "ROCm", @@ -11827,8 +4570,8 @@ "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -11841,7 +4584,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 40.46, + "tps_mean": 40.17, "tps_std": 0.02, "error": false, "error_type": null, @@ -11855,120 +4598,8 @@ "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1082.35, - "tps_std": 6.79, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.7, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 46.48, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.7, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 777.89, - "tps_std": 1.08, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.7, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 40.24, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.7, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -11981,8 +4612,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1092.86, - "tps_std": 9.42, + "tps_mean": 1111.77, + "tps_std": 11.14, "error": false, "error_type": null, "backend": "ROCm", @@ -11995,8 +4626,8 @@ "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -12009,8 +4640,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 48.16, - "tps_std": 0.3, + "tps_mean": 47.94, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -12023,8 +4654,8 @@ "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -12037,8 +4668,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 655.39, - "tps_std": 2.0, + "tps_mean": 669.42, + "tps_std": 1.32, "error": false, "error_type": null, "backend": "ROCm", @@ -12051,8 +4682,8 @@ "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -12065,7 +4696,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 41.9, + "tps_mean": 41.96, "tps_std": 0.03, "error": false, "error_type": null, @@ -12079,120 +4710,8 @@ "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1113.86, - "tps_std": 6.42, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.7, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 48.1, - "tps_std": 0.31, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.7, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 655.89, - "tps_std": 1.47, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.7, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 41.98, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.7, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -12205,8 +4724,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 661.63, - "tps_std": 3.14, + "tps_mean": 663.48, + "tps_std": 2.94, "error": false, "error_type": null, "backend": "Vulkan", @@ -12219,8 +4738,8 @@ "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -12233,7 +4752,63 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 58.16, + "tps_mean": 58.17, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.7, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 289.32, + "tps_std": 1.23, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.7, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 44.23, "tps_std": 0.08, "error": false, "error_type": null, @@ -12244,67 +4819,11 @@ "file_size_gib": 20.7, "name_params_b": 34.66, "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 288.86, - "tps_std": 0.53, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.7, - "name_params_b": 34.66, - "quant": "Q4_K_XL", "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 44.24, - "tps_std": 0.09, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.7, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -12317,8 +4836,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1013.4, - "tps_std": 39.22, + "tps_mean": 1016.96, + "tps_std": 39.65, "error": false, "error_type": null, "backend": "Vulkan", @@ -12331,8 +4850,8 @@ "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -12345,783 +4864,447 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 59.13, - "tps_std": 0.07, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.7, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 673.55, - "tps_std": 0.64, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.7, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 48.93, - "tps_std": 0.13, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.7, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 854.2, - "tps_std": 6.85, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 14.2, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 392.45, - "tps_std": 16.97, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 11.63, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 877.69, - "tps_std": 1.71, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 14.19, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 387.73, - "tps_std": 18.58, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 11.63, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 869.53, - "tps_std": 1.47, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 14.15, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 260.25, - "tps_std": 4.3, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 11.6, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 870.35, - "tps_std": 1.54, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 14.15, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 263.85, - "tps_std": 5.85, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 11.61, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 892.78, - "tps_std": 1.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 14.19, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 441.92, - "tps_std": 38.71, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 11.63, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 895.09, - "tps_std": 0.81, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 14.19, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 449.4, - "tps_std": 28.98, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 11.63, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 122.72, + "tps_mean": 59.11, "tps_std": 0.06, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", + "params_b": 34.66, + "file_size_gib": 20.7, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 673.76, + "tps_std": 0.15, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.7, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 49.03, + "tps_std": 0.14, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.7, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "env": "rocm-7_2_1", + "env_base": "rocm", + "env_variant": "7_2_1", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 703.33, + "tps_std": 5.75, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 45.33, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "env": "rocm-7_2_1", + "env_base": "rocm", + "env_variant": "7_2_1", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 29.06, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 45.33, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "env": "rocm-7_2_1", + "env_base": "rocm", + "env_variant": "7_2_1", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 489.36, + "tps_std": 3.31, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 45.33, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "env": "rocm-7_2_1", + "env_base": "rocm", + "env_variant": "7_2_1", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 26.31, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 45.33, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 698.12, + "tps_std": 4.53, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 45.33, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 25.88, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 45.33, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 615.35, + "tps_std": 3.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 45.33, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 26.34, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 45.33, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 697.35, + "tps_std": 3.46, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 45.33, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 28.1, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 45.33, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 553.41, + "tps_std": 1.44, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 45.33, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 26.57, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 45.33, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 415.49, + "tps_std": 2.3, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 45.33, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", "env": "vulkan_amdvlk", "env_base": "vulkan_amdvlk", "env_variant": null, @@ -13129,27 +5312,27 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 14.31, + "tps_mean": 30.74, "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, + "params_b": 34.66, + "file_size_gib": 45.33, + "name_params_b": 34.66, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", + "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", "env": "vulkan_amdvlk", "env_base": "vulkan_amdvlk", "env_variant": null, @@ -13157,27 +5340,587 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 107.34, + "tps_mean": 228.9, + "tps_std": 0.28, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 45.33, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 26.62, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 45.33, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 710.93, + "tps_std": 18.62, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 45.33, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 28.92, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 45.33, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 531.15, + "tps_std": 0.2, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 45.33, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 26.32, "tps_std": 0.03, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, + "params_b": 34.66, + "file_size_gib": 45.33, + "name_params_b": 34.66, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm-7_2_1", + "env_base": "rocm", + "env_variant": "7_2_1", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 864.78, + "tps_std": 7.29, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm-7_2_1", + "env_base": "rocm", + "env_variant": "7_2_1", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 22.66, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm-7_2_1", + "env_base": "rocm", + "env_variant": "7_2_1", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 625.12, + "tps_std": 11.57, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm-7_2_1", + "env_base": "rocm", + "env_variant": "7_2_1", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 19.99, + "tps_std": 0.23, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 839.13, + "tps_std": 7.61, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 20.75, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 646.67, + "tps_std": 11.22, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 19.86, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 844.42, + "tps_std": 19.66, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 22.69, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 546.66, + "tps_std": 5.96, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 20.11, + "tps_std": 0.07, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 117.96, + "tps_std": 0.28, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 16.18, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 58.94, + "tps_std": 0.07, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", "env": "vulkan_amdvlk", "env_base": "vulkan_amdvlk", "env_variant": null, @@ -13185,83 +5928,83 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 11.32, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 513.78, - "tps_std": 16.1, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 14.07, + "tps_mean": 15.26, "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log", + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 376.52, + "tps_std": 7.37, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.74, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", "env": "vulkan_radv", "env_base": "vulkan_radv", "env_variant": null, @@ -13269,27 +6012,27 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 258.63, - "tps_std": 1.57, + "tps_mean": 351.87, + "tps_std": 1.49, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", "env": "vulkan_radv", "env_base": "vulkan_radv", "env_variant": null, @@ -13297,251 +6040,139 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 11.53, - "tps_std": 0.0, + "tps_mean": 13.71, + "tps_std": 0.06, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm-7_2", + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "rocm-7_2_1", "env_base": "rocm", - "env_variant": "7_2", + "env_variant": "7_2_1", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 324.28, - "tps_std": 1.23, + "tps_mean": 1298.78, + "tps_std": 7.07, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1.log", + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm-7_2", + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "rocm-7_2_1", "env_base": "rocm", - "env_variant": "7_2", + "env_variant": "7_2_1", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 3.97, - "tps_std": 0.0, + "tps_mean": 46.65, + "tps_std": 0.23, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1.log", + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm-7_2", + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "rocm-7_2_1", "env_base": "rocm", - "env_variant": "7_2", + "env_variant": "7_2_1", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 166.64, - "tps_std": 2.06, + "tps_mean": 691.44, + "tps_std": 4.5, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log", + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm-7_2", + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "rocm-7_2_1", "env_base": "rocm", - "env_variant": "7_2", + "env_variant": "7_2_1", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 3.7, - "tps_std": 0.0, + "tps_mean": 36.86, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log", + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 455.07, - "tps_std": 0.55, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 3.97, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 188.26, - "tps_std": 1.59, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 3.7, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", "env": "rocm6_4_4", "env_base": "rocm6_4_4", "env_variant": null, @@ -13549,27 +6180,27 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 426.08, - "tps_std": 0.81, + "tps_mean": 1288.38, + "tps_std": 10.85, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log", + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", "env": "rocm6_4_4", "env_base": "rocm6_4_4", "env_variant": null, @@ -13577,251 +6208,139 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 3.83, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 209.69, - "tps_std": 3.55, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 3.68, + "tps_mean": 45.58, "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_4-hblt0", + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "rocm6_4_4", "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 440.33, - "tps_std": 0.38, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 3.96, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", + "env_variant": null, "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 202.85, - "tps_std": 0.64, + "tps_mean": 721.84, + "tps_std": 10.97, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_4-hblt0", + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "rocm6_4_4", "env_base": "rocm6_4_4", - "env_variant": "hblt0", + "env_variant": null, "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 3.69, + "tps_mean": 36.17, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1219.74, + "tps_std": 15.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 46.57, "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 466.09, - "tps_std": 1.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 3.98, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", "env": "rocm7-nightlies", "env_base": "rocm7", "env_variant": "nightlies", @@ -13829,27 +6348,27 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 182.83, - "tps_std": 1.03, + "tps_mean": 600.42, + "tps_std": 7.55, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", "env": "rocm7-nightlies", "env_base": "rocm7", "env_variant": "nightlies", @@ -13857,189 +6376,139 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 3.68, + "tps_mean": 36.88, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 720.78, + "tps_std": 2.94, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 50.21, + "tps_std": 0.06, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 100.79, "tps_std": 0.03, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 452.21, - "tps_std": 1.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 3.98, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 181.81, - "tps_std": 3.26, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 3.7, - "tps_std": 0.0, + "tps_mean": 35.41, + "tps_std": 0.01, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": null - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": null - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", "env": "vulkan_radv", "env_base": "vulkan_radv", "env_variant": null, @@ -14047,27 +6516,27 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 91.65, - "tps_std": 0.4, + "tps_mean": 1213.78, + "tps_std": 7.36, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log", + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", "env": "vulkan_radv", "env_base": "vulkan_radv", "env_variant": null, @@ -14075,27 +6544,27 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 3.98, - "tps_std": 0.0, + "tps_mean": 53.04, + "tps_std": 0.1, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log", + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", "env": "vulkan_radv", "env_base": "vulkan_radv", "env_variant": null, @@ -14103,27 +6572,27 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 81.52, - "tps_std": 0.33, + "tps_mean": 638.76, + "tps_std": 1.76, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", "env": "vulkan_radv", "env_base": "vulkan_radv", "env_variant": null, @@ -14131,475 +6600,1838 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 3.73, - "tps_std": 0.0, + "tps_mean": 40.93, + "tps_std": 0.09, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm-7_2", + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm-7_2_1", "env_base": "rocm", - "env_variant": "7_2", + "env_variant": "7_2_1", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 2700.94, + "tps_mean": 1226.22, + "tps_std": 161.95, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm-7_2_1", + "env_base": "rocm", + "env_variant": "7_2_1", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 41.27, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm-7_2_1", + "env_base": "rocm", + "env_variant": "7_2_1", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 667.32, + "tps_std": 5.65, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm-7_2_1", + "env_base": "rocm", + "env_variant": "7_2_1", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 33.43, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1303.32, + "tps_std": 8.77, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 38.81, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 727.92, + "tps_std": 8.62, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 32.52, + "tps_std": 0.35, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1294.4, + "tps_std": 31.54, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 41.09, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 596.78, + "tps_std": 7.98, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 33.35, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 589.2, "tps_std": 1.79, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1.log", + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 77.69, - "tps_std": 0.66, + "tps_mean": 43.18, + "tps_std": 0.07, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1.log", + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 1564.27, - "tps_std": 18.55, + "tps_mean": 97.7, + "tps_std": 0.1, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1__longctx32768.log", + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 56.8, - "tps_std": 0.48, + "tps_mean": 31.82, + "tps_std": 0.05, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1__longctx32768.log", + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 2691.0, - "tps_std": 4.6, + "tps_mean": 944.96, + "tps_std": 19.24, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1.log", + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 77.86, - "tps_std": 0.64, + "tps_mean": 43.58, + "tps_std": 1.06, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1.log", + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 1563.05, - "tps_std": 19.77, + "tps_mean": 579.88, + "tps_std": 0.09, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 35.52, + "tps_std": 0.07, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "rocm-7_2_1", + "env_base": "rocm", + "env_variant": "7_2_1", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 381.31, + "tps_std": 4.26, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1__longctx32768.log", + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm-7_2-hblt0", + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "rocm-7_2_1", "env_base": "rocm", - "env_variant": "7_2-hblt0", + "env_variant": "7_2_1", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 3.49, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "rocm-7_2_1", + "env_base": "rocm", + "env_variant": "7_2_1", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 151.84, + "tps_std": 1.65, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "rocm-7_2_1", + "env_base": "rocm", + "env_variant": "7_2_1", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 56.95, + "tps_mean": 3.17, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 360.84, + "tps_std": 1.6, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 3.3, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 154.86, + "tps_std": 2.16, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.14, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 373.84, + "tps_std": 0.93, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 3.41, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 139.53, + "tps_std": 1.98, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.16, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 31.0, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": null + }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 31.0, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": null + }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 79.53, + "tps_std": 0.26, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 3.5, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 60.29, + "tps_std": 0.37, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.24, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "rocm-7_2_1", + "env_base": "rocm", + "env_variant": "7_2_1", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 313.26, + "tps_std": 1.19, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "rocm-7_2_1", + "env_base": "rocm", + "env_variant": "7_2_1", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 10.51, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "rocm-7_2_1", + "env_base": "rocm", + "env_variant": "7_2_1", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 141.61, + "tps_std": 1.71, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "rocm-7_2_1", + "env_base": "rocm", + "env_variant": "7_2_1", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 8.11, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 309.22, + "tps_std": 0.13, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 10.02, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 143.91, + "tps_std": 1.64, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 7.81, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 315.72, + "tps_std": 0.3, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 10.51, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 130.8, + "tps_std": 1.45, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 8.2, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 69.12, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 10.71, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 31.0, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": null + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 244.46, + "tps_std": 0.27, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 11.02, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 124.32, + "tps_std": 1.04, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 8.78, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", + "env": "rocm-7_2_1", + "env_base": "rocm", + "env_variant": "7_2_1", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 309.13, + "tps_std": 0.94, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 32.6, + "name_params_b": 30.7, + "quant": "Q8_K_XL", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", + "env": "rocm-7_2_1", + "env_base": "rocm", + "env_variant": "7_2_1", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 6.16, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 32.6, + "name_params_b": 30.7, + "quant": "Q8_K_XL", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", + "env": "rocm-7_2_1", + "env_base": "rocm", + "env_variant": "7_2_1", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 140.72, + "tps_std": 1.33, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 32.6, + "name_params_b": 30.7, + "quant": "Q8_K_XL", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", + "env": "rocm-7_2_1", + "env_base": "rocm", + "env_variant": "7_2_1", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 5.26, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 32.6, + "name_params_b": 30.7, + "quant": "Q8_K_XL", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 303.4, + "tps_std": 0.23, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 32.6, + "name_params_b": 30.7, + "quant": "Q8_K_XL", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 6.07, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 32.6, + "name_params_b": 30.7, + "quant": "Q8_K_XL", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 141.37, + "tps_std": 1.98, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 32.6, + "name_params_b": 30.7, + "quant": "Q8_K_XL", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 5.18, "tps_std": 0.08, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1__longctx32768.log", + "params_b": 30.7, + "file_size_gib": 32.6, + "name_params_b": 30.7, + "quant": "Q8_K_XL", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 2719.77, - "tps_std": 6.47, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 73.32, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 1687.79, - "tps_std": 33.87, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 54.09, - "tps_std": 0.16, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 2713.06, - "tps_std": 9.07, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 72.85, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 1681.29, - "tps_std": 17.71, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 54.11, - "tps_std": 0.29, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", + "model": "gemma-4-31B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", "env": "rocm7-nightlies", "env_base": "rocm7", "env_variant": "nightlies", @@ -14607,27 +8439,27 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 2666.49, - "tps_std": 3.29, + "tps_mean": 319.75, + "tps_std": 0.26, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log", + "params_b": 30.7, + "file_size_gib": 32.6, + "name_params_b": 30.7, + "quant": "Q8_K_XL", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", + "model": "gemma-4-31B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", "env": "rocm7-nightlies", "env_base": "rocm7", "env_variant": "nightlies", @@ -14635,27 +8467,27 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 78.31, - "tps_std": 0.89, + "tps_mean": 6.12, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log", + "params_b": 30.7, + "file_size_gib": 32.6, + "name_params_b": 30.7, + "quant": "Q8_K_XL", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", + "model": "gemma-4-31B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", "env": "rocm7-nightlies", "env_base": "rocm7", "env_variant": "nightlies", @@ -14663,27 +8495,27 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 1530.18, - "tps_std": 18.45, + "tps_mean": 128.0, + "tps_std": 1.3, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log", + "params_b": 30.7, + "file_size_gib": 32.6, + "name_params_b": 30.7, + "quant": "Q8_K_XL", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", + "model": "gemma-4-31B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", "env": "rocm7-nightlies", "env_base": "rocm7", "env_variant": "nightlies", @@ -14691,372 +8523,285 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 57.72, + "tps_mean": 5.3, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 32.6, + "name_params_b": 30.7, + "quant": "Q8_K_XL", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 72.26, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 32.6, + "name_params_b": 30.7, + "quant": "Q8_K_XL", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 6.28, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 32.6, + "name_params_b": 30.7, + "quant": "Q8_K_XL", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 31.0, + "quant": "Q8_K_XL", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": null + }, + { + "model": "gemma-4-31B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 209.2, + "tps_std": 6.85, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 32.6, + "name_params_b": 30.7, + "quant": "Q8_K_XL", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 6.28, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 32.6, + "name_params_b": 30.7, + "quant": "Q8_K_XL", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 112.72, + "tps_std": 1.26, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 32.6, + "name_params_b": 30.7, + "quant": "Q8_K_XL", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gemma-4-31B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 5.49, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 32.6, + "name_params_b": 30.7, + "quant": "Q8_K_XL", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7_2_1", + "env_base": "rocm", + "env_variant": "7_2_1", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 633.0, + "tps_std": 7.37, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7_2_1", + "env_base": "rocm", + "env_variant": "7_2_1", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.15, "tps_std": 0.36, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log", + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 2674.53, - "tps_std": 5.86, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 78.93, - "tps_std": 0.64, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 1534.58, - "tps_std": 20.23, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 57.65, - "tps_std": 0.44, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 647.4, - "tps_std": 0.53, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 79.07, - "tps_std": 0.08, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 455.63, - "tps_std": 1.03, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 54.86, - "tps_std": 0.17, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 2479.97, - "tps_std": 41.4, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 87.24, - "tps_std": 0.13, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 1759.67, - "tps_std": 6.72, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 64.91, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { "model": "gpt-oss-120b-mxfp4-00001-of-00003", "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7_2", + "env": "rocm-7_2_1", "env_base": "rocm", - "env_variant": "7_2", + "env_variant": "7_2_1", "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 631.59, - "tps_std": 4.27, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 296.45, + "tps_std": 1.01, "error": false, "error_type": null, "backend": "ROCm", @@ -15066,25 +8811,25 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { "model": "gpt-oss-120b-mxfp4-00001-of-00003", "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7_2", + "env": "rocm-7_2_1", "env_base": "rocm", - "env_variant": "7_2", + "env_variant": "7_2_1", "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 50.91, - "tps_std": 0.01, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 31.19, + "tps_std": 8.13, "error": false, "error_type": null, "backend": "ROCm", @@ -15094,24 +8839,80 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { "model": "gpt-oss-120b-mxfp4-00001-of-00003", "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 648.77, + "tps_std": 4.37, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 49.76, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 303.63, + "tps_mean": 439.58, "tps_std": 0.57, "error": false, "error_type": null, @@ -15122,24 +8923,24 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1__longctx32768.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { "model": "gpt-oss-120b-mxfp4-00001-of-00003", "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 36.07, + "tps_mean": 39.77, "tps_std": 0.03, "error": false, "error_type": null, @@ -15150,347 +8951,11 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 610.41, - "tps_std": 53.09, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 51.34, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 302.82, - "tps_std": 1.3, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 36.12, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 648.44, - "tps_std": 6.33, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 49.85, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 442.64, - "tps_std": 0.84, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 39.69, - "tps_std": 0.34, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 639.43, - "tps_std": 31.93, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 50.99, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 438.75, - "tps_std": 1.06, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 39.75, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -15503,8 +8968,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 596.69, - "tps_std": 97.42, + "tps_mean": 641.05, + "tps_std": 2.79, "error": false, "error_type": null, "backend": "ROCm", @@ -15517,8 +8982,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -15531,8 +8996,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 51.38, - "tps_std": 0.0, + "tps_mean": 50.73, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -15545,8 +9010,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -15559,8 +9024,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 308.13, - "tps_std": 1.66, + "tps_mean": 305.81, + "tps_std": 0.24, "error": false, "error_type": null, "backend": "ROCm", @@ -15573,8 +9038,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -15587,8 +9052,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 35.43, - "tps_std": 8.0, + "tps_mean": 36.03, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "ROCm", @@ -15601,120 +9066,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 641.07, - "tps_std": 11.17, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 51.35, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 314.5, - "tps_std": 0.28, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 40.04, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -15727,8 +9080,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 576.81, - "tps_std": 2.43, + "tps_mean": 575.01, + "tps_std": 3.11, "error": false, "error_type": null, "backend": "Vulkan", @@ -15741,8 +9094,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -15755,8 +9108,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 51.18, - "tps_std": 0.04, + "tps_mean": 51.2, + "tps_std": 0.06, "error": false, "error_type": null, "backend": "Vulkan", @@ -15769,8 +9122,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -15783,8 +9136,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 213.74, - "tps_std": 0.68, + "tps_mean": 214.22, + "tps_std": 0.5, "error": false, "error_type": null, "backend": "Vulkan", @@ -15797,8 +9150,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -15811,853 +9164,489 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 34.52, - "tps_std": 0.07, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 633.21, - "tps_std": 13.06, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 56.15, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 287.49, - "tps_std": 1.21, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 42.67, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1638.53, - "tps_std": 13.5, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 72.67, - "tps_std": 0.07, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 488.89, - "tps_std": 0.54, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 51.91, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1631.29, - "tps_std": 15.38, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 72.62, - "tps_std": 0.09, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 492.09, - "tps_std": 1.86, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 51.93, + "tps_mean": 34.54, "tps_std": 0.04, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1__longctx32768.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", "env_variant": null, "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1691.01, - "tps_std": 16.68, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 72.07, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 730.11, - "tps_std": 1.16, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 57.02, - "tps_std": 0.07, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1690.3, - "tps_std": 13.53, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 72.0, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 730.49, - "tps_std": 1.2, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 56.89, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1668.5, - "tps_std": 13.61, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 72.68, - "tps_std": 0.1, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 507.77, - "tps_std": 2.81, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 57.46, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1642.7, - "tps_std": 14.12, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 72.75, - "tps_std": 0.1, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 507.84, - "tps_std": 1.97, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 57.32, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1303.99, - "tps_std": 6.84, + "tps_mean": 635.12, + "tps_std": 4.23, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", "env_variant": null, "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 73.68, + "tps_mean": 56.31, "tps_std": 0.08, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 290.19, + "tps_std": 1.03, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 42.28, + "tps_std": 0.06, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7_2_1", + "env_base": "rocm", + "env_variant": "7_2_1", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1631.41, + "tps_std": 13.3, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_1__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7_2_1", + "env_base": "rocm", + "env_variant": "7_2_1", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 72.63, + "tps_std": 0.1, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_1__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7_2_1", + "env_base": "rocm", + "env_variant": "7_2_1", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 489.21, + "tps_std": 2.12, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_1__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7_2_1", + "env_base": "rocm", + "env_variant": "7_2_1", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 51.87, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_1__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1694.78, + "tps_std": 17.2, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 72.19, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 730.43, + "tps_std": 1.15, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 57.22, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1635.73, + "tps_std": 10.16, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 72.47, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 493.84, + "tps_std": 1.85, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 52.05, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1306.44, + "tps_std": 9.57, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, "params_b": 20.91, "file_size_gib": 11.27, "name_params_b": 20.91, @@ -16665,8 +9654,36 @@ "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 73.63, + "tps_std": 0.06, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" } }, { @@ -16679,35 +9696,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 364.73, - "tps_std": 0.33, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 49.99, + "tps_mean": 365.33, "tps_std": 0.21, "error": false, "error_type": null, @@ -16721,8 +9710,36 @@ "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 49.83, + "tps_std": 0.07, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" } }, { @@ -16735,8 +9752,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1577.96, - "tps_std": 12.64, + "tps_mean": 1574.45, + "tps_std": 16.63, "error": false, "error_type": null, "backend": "Vulkan", @@ -16749,8 +9766,8 @@ "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -16763,8 +9780,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 78.94, - "tps_std": 0.01, + "tps_mean": 79.03, + "tps_std": 0.17, "error": false, "error_type": null, "backend": "Vulkan", @@ -16777,8 +9794,8 @@ "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -16791,8 +9808,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 541.12, - "tps_std": 0.2, + "tps_mean": 545.65, + "tps_std": 0.37, "error": false, "error_type": null, "backend": "Vulkan", @@ -16805,8 +9822,8 @@ "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -16819,8 +9836,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 60.77, - "tps_std": 0.08, + "tps_mean": 60.7, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "Vulkan", @@ -16833,22 +9850,22 @@ "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7_2", + "env": "rocm-7_2_1", "env_base": "rocm", - "env_variant": "7_2", + "env_variant": "7_2_1", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1543.27, - "tps_std": 2.63, + "tps_mean": 1541.76, + "tps_std": 1.17, "error": false, "error_type": null, "backend": "ROCm", @@ -16858,25 +9875,25 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7_2__fa1.log", + "log": "results/llama-2-7b.Q4_0__rocm-7_2_1__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7_2", + "env": "rocm-7_2_1", "env_base": "rocm", - "env_variant": "7_2", + "env_variant": "7_2_1", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 50.61, - "tps_std": 0.14, + "tps_mean": 50.64, + "tps_std": 0.16, "error": false, "error_type": null, "backend": "ROCm", @@ -16886,25 +9903,25 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7_2__fa1.log", + "log": "results/llama-2-7b.Q4_0__rocm-7_2_1__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7_2", + "env": "rocm-7_2_1", "env_base": "rocm", - "env_variant": "7_2", + "env_variant": "7_2_1", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 165.68, - "tps_std": 0.88, + "tps_mean": 162.73, + "tps_std": 0.61, "error": false, "error_type": null, "backend": "ROCm", @@ -16914,19 +9931,19 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7_2__fa1__longctx32768.log", + "log": "results/llama-2-7b.Q4_0__rocm-7_2_1__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7_2", + "env": "rocm-7_2_1", "env_base": "rocm", - "env_variant": "7_2", + "env_variant": "7_2_1", "fa": true, "context": "longctx32768", "context_tokens": 32768, @@ -16942,123 +9959,11 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7_2__fa1__longctx32768.log", + "log": "results/llama-2-7b.Q4_0__rocm-7_2_1__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1539.48, - "tps_std": 5.61, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 50.57, - "tps_std": 0.15, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 166.31, - "tps_std": 1.47, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 5.65, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -17071,8 +9976,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1528.37, - "tps_std": 6.4, + "tps_mean": 1524.35, + "tps_std": 2.98, "error": false, "error_type": null, "backend": "ROCm", @@ -17085,8 +9990,8 @@ "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -17099,7 +10004,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 51.14, + "tps_mean": 50.78, "tps_std": 0.02, "error": false, "error_type": null, @@ -17113,8 +10018,8 @@ "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -17127,8 +10032,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 196.35, - "tps_std": 2.62, + "tps_mean": 196.44, + "tps_std": 1.93, "error": false, "error_type": null, "backend": "ROCm", @@ -17141,8 +10046,8 @@ "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -17155,7 +10060,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 6.97, + "tps_mean": 6.96, "tps_std": 0.0, "error": false, "error_type": null, @@ -17169,120 +10074,8 @@ "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1521.12, - "tps_std": 5.74, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 51.22, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 196.03, - "tps_std": 1.28, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 6.97, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -17295,8 +10088,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1507.68, - "tps_std": 1.23, + "tps_mean": 1513.21, + "tps_std": 2.79, "error": false, "error_type": null, "backend": "ROCm", @@ -17309,8 +10102,8 @@ "log": "results/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -17324,7 +10117,7 @@ "context_tokens": null, "test": "tg128", "tps_mean": 51.41, - "tps_std": 0.14, + "tps_std": 0.13, "error": false, "error_type": null, "backend": "ROCm", @@ -17337,8 +10130,8 @@ "log": "results/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -17351,8 +10144,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 206.97, - "tps_std": 1.96, + "tps_mean": 189.28, + "tps_std": 0.4, "error": false, "error_type": null, "backend": "ROCm", @@ -17365,8 +10158,8 @@ "log": "results/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -17393,192 +10186,108 @@ "log": "results/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1508.58, - "tps_std": 2.01, + "tps_mean": 326.38, + "tps_std": 0.44, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, "params_b": 6.74, "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log", + "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 51.44, - "tps_std": 0.15, + "tps_mean": 55.44, + "tps_std": 0.11, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, "params_b": 6.74, "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log", + "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 206.54, - "tps_std": 0.91, + "tps_mean": 145.14, + "tps_std": 0.2, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, "params_b": 6.74, "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 5.62, + "tps_mean": 10.21, "tps_std": 0.0, "error": false, "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 324.82, - "tps_std": 0.45, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 55.43, - "tps_std": 0.14, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 144.29, - "tps_std": 1.09, - "error": false, - "error_type": null, "backend": "Vulkan", "ngl": 99, "mmap": 0, @@ -17589,36 +10298,8 @@ "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 10.2, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -17631,8 +10312,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1313.97, - "tps_std": 1.29, + "tps_mean": 1311.79, + "tps_std": 0.38, "error": false, "error_type": null, "backend": "Vulkan", @@ -17645,8 +10326,8 @@ "log": "results/llama-2-7b.Q4_0__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -17660,7 +10341,7 @@ "context_tokens": null, "test": "tg128", "tps_mean": 55.59, - "tps_std": 0.05, + "tps_std": 0.03, "error": false, "error_type": null, "backend": "Vulkan", @@ -17673,8 +10354,8 @@ "log": "results/llama-2-7b.Q4_0__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -17687,8 +10368,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 208.18, - "tps_std": 2.24, + "tps_mean": 210.35, + "tps_std": 0.6, "error": false, "error_type": null, "backend": "Vulkan", @@ -17701,8 +10382,8 @@ "log": "results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { @@ -17715,7 +10396,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 8.52, + "tps_mean": 8.53, "tps_std": 0.01, "error": false, "error_type": null, @@ -17729,16 +10410,16 @@ "log": "results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "2405d59cb", - "number": "8577" + "hash": "3f8752b55", + "number": "8743" } }, { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", - "env": "rocm-7_2-hblt0", + "env": "rocm-7_2", "env_base": "rocm", - "env_variant": "7_2-hblt0", + "env_variant": "7_2", "fa": true, "context": "longctx32768", "context_tokens": 32768, @@ -17764,9 +10445,9 @@ { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", - "env": "rocm-7_2-hblt0", + "env": "rocm-7_2", "env_base": "rocm", - "env_variant": "7_2-hblt0", + "env_variant": "7_2", "fa": true, "context": "longctx32768", "context_tokens": 32768, @@ -17792,9 +10473,9 @@ { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", - "env": "rocm-7_2-hblt0", + "env": "rocm-7_2", "env_base": "rocm", - "env_variant": "7_2-hblt0", + "env_variant": "7_2", "fa": true, "context": "default", "context_tokens": null, @@ -17820,9 +10501,9 @@ { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", - "env": "rocm-7_2-hblt0", + "env": "rocm-7_2", "env_base": "rocm", - "env_variant": "7_2-hblt0", + "env_variant": "7_2", "fa": true, "context": "default", "context_tokens": null, @@ -17960,9 +10641,9 @@ { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", + "env": "rocm6_4_4", "env_base": "rocm6_4_4", - "env_variant": "hblt0", + "env_variant": null, "fa": true, "context": "longctx32768", "context_tokens": 32768, @@ -17988,9 +10669,9 @@ { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", + "env": "rocm6_4_4", "env_base": "rocm6_4_4", - "env_variant": "hblt0", + "env_variant": null, "fa": true, "context": "longctx32768", "context_tokens": 32768, @@ -18016,9 +10697,9 @@ { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", + "env": "rocm6_4_4", "env_base": "rocm6_4_4", - "env_variant": "hblt0", + "env_variant": null, "fa": true, "context": "default", "context_tokens": null, @@ -18044,9 +10725,9 @@ { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", + "env": "rocm6_4_4", "env_base": "rocm6_4_4", - "env_variant": "hblt0", + "env_variant": null, "fa": true, "context": "default", "context_tokens": null, @@ -18184,9 +10865,9 @@ { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", - "env": "rocm7-nightlies-hblt0", + "env": "rocm7-nightlies", "env_base": "rocm7", - "env_variant": "nightlies-hblt0", + "env_variant": "nightlies", "fa": true, "context": "longctx32768", "context_tokens": 32768, @@ -18212,9 +10893,9 @@ { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", - "env": "rocm7-nightlies-hblt0", + "env": "rocm7-nightlies", "env_base": "rocm7", - "env_variant": "nightlies-hblt0", + "env_variant": "nightlies", "fa": true, "context": "longctx32768", "context_tokens": 32768, @@ -18240,9 +10921,9 @@ { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", - "env": "rocm7-nightlies-hblt0", + "env": "rocm7-nightlies", "env_base": "rocm7", - "env_variant": "nightlies-hblt0", + "env_variant": "nightlies", "fa": true, "context": "default", "context_tokens": null, @@ -18268,9 +10949,9 @@ { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", - "env": "rocm7-nightlies-hblt0", + "env": "rocm7-nightlies", "env_base": "rocm7", - "env_variant": "nightlies-hblt0", + "env_variant": "nightlies", "fa": true, "context": "default", "context_tokens": null, @@ -18508,9 +11189,9 @@ { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", - "env": "rocm-7_2-hblt0", + "env": "rocm-7_2", "env_base": "rocm", - "env_variant": "7_2-hblt0", + "env_variant": "7_2", "fa": true, "context": "longctx32768", "context_tokens": 32768, @@ -18536,9 +11217,9 @@ { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", - "env": "rocm-7_2-hblt0", + "env": "rocm-7_2", "env_base": "rocm", - "env_variant": "7_2-hblt0", + "env_variant": "7_2", "fa": true, "context": "longctx32768", "context_tokens": 32768, @@ -18564,9 +11245,9 @@ { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", - "env": "rocm-7_2-hblt0", + "env": "rocm-7_2", "env_base": "rocm", - "env_variant": "7_2-hblt0", + "env_variant": "7_2", "fa": true, "context": "default", "context_tokens": null, @@ -18592,9 +11273,9 @@ { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", - "env": "rocm-7_2-hblt0", + "env": "rocm-7_2", "env_base": "rocm", - "env_variant": "7_2-hblt0", + "env_variant": "7_2", "fa": true, "context": "default", "context_tokens": null, @@ -18732,9 +11413,9 @@ { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", - "env": "rocm6_4_4-hblt0", + "env": "rocm6_4_4", "env_base": "rocm6_4_4", - "env_variant": "hblt0", + "env_variant": null, "fa": true, "context": "longctx32768", "context_tokens": 32768, @@ -18760,9 +11441,9 @@ { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", - "env": "rocm6_4_4-hblt0", + "env": "rocm6_4_4", "env_base": "rocm6_4_4", - "env_variant": "hblt0", + "env_variant": null, "fa": true, "context": "longctx32768", "context_tokens": 32768, @@ -18788,9 +11469,9 @@ { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", - "env": "rocm6_4_4-hblt0", + "env": "rocm6_4_4", "env_base": "rocm6_4_4", - "env_variant": "hblt0", + "env_variant": null, "fa": true, "context": "default", "context_tokens": null, @@ -18816,9 +11497,9 @@ { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", - "env": "rocm6_4_4-hblt0", + "env": "rocm6_4_4", "env_base": "rocm6_4_4", - "env_variant": "hblt0", + "env_variant": null, "fa": true, "context": "default", "context_tokens": null, @@ -18956,9 +11637,9 @@ { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", - "env": "rocm7-nightlies-hblt0", + "env": "rocm7-nightlies", "env_base": "rocm7", - "env_variant": "nightlies-hblt0", + "env_variant": "nightlies", "fa": true, "context": "longctx32768", "context_tokens": 32768, @@ -18984,9 +11665,9 @@ { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", - "env": "rocm7-nightlies-hblt0", + "env": "rocm7-nightlies", "env_base": "rocm7", - "env_variant": "nightlies-hblt0", + "env_variant": "nightlies", "fa": true, "context": "longctx32768", "context_tokens": 32768, @@ -19012,9 +11693,9 @@ { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", - "env": "rocm7-nightlies-hblt0", + "env": "rocm7-nightlies", "env_base": "rocm7", - "env_variant": "nightlies-hblt0", + "env_variant": "nightlies", "fa": true, "context": "default", "context_tokens": null, @@ -19040,9 +11721,9 @@ { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", - "env": "rocm7-nightlies-hblt0", + "env": "rocm7-nightlies", "env_base": "rocm7", - "env_variant": "nightlies-hblt0", + "env_variant": "nightlies", "fa": true, "context": "default", "context_tokens": null,