From 2d09b9e6dbf24edbecc7ec21a1cd88b4317b58a3 Mon Sep 17 00:00:00 2001 From: Donato Capitella Date: Thu, 5 Feb 2026 19:03:13 +0000 Subject: [PATCH] updated benchmarks --- ...-Q4_K_XL-00001-of-00002__rocm-7.2__fa1.log | 8 + ...-of-00002__rocm-7.2__fa1__longctx32768.log | 8 + ...L-00001-of-00002__rocm-7.2__hblt0__fa1.log | 8 + ...02__rocm-7.2__hblt0__fa1__longctx32768.log | 8 + ...Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log | 8 + ...of-00002__rocm6_4_4__fa1__longctx32768.log | 8 + ...-00001-of-00002__rocm6_4_4__hblt0__fa1.log | 8 + ...2__rocm6_4_4__hblt0__fa1__longctx32768.log | 8 + ...L-00001-of-00002__rocm7-nightlies__fa1.log | 8 + ...02__rocm7-nightlies__fa1__longctx32768.log | 18 + ...-of-00002__rocm7-nightlies__hblt0__fa1.log | 8 + ...m7-nightlies__hblt0__fa1__longctx32768.log | 8 + ...Q4_K_XL-00001-of-00002__rocm7.1.1__fa1.log | 0 ...of-00002__rocm7.1.1__fa1__longctx32768.log | 0 ...-00001-of-00002__rocm7.1.1__hblt0__fa1.log | 0 ...2__rocm7.1.1__hblt0__fa1__longctx32768.log | 0 ..._XL-00001-of-00002__vulkan_amdvlk__fa1.log | 8 + ...0002__vulkan_amdvlk__fa1__longctx32768.log | 8 + ..._K_XL-00001-of-00002__vulkan_radv__fa1.log | 8 + ...-00002__vulkan_radv__fa1__longctx32768.log | 8 + ...-Q4_K_XL-00001-of-00002__rocm-7.2__fa1.log | 0 ...-of-00002__rocm-7.2__fa1__longctx32768.log | 0 ...L-00001-of-00002__rocm-7.2__hblt0__fa1.log | 0 ...02__rocm-7.2__hblt0__fa1__longctx32768.log | 0 ...Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log | 0 ...of-00002__rocm6_4_4__fa1__longctx32768.log | 0 ...-00001-of-00002__rocm6_4_4__hblt0__fa1.log | 0 ...2__rocm6_4_4__hblt0__fa1__longctx32768.log | 0 ...L-00001-of-00002__rocm7-nightlies__fa1.log | 0 ...02__rocm7-nightlies__fa1__longctx32768.log | 0 ...-of-00002__rocm7-nightlies__hblt0__fa1.log | 0 ...m7-nightlies__hblt0__fa1__longctx32768.log | 0 ...Q4_K_XL-00001-of-00002__rocm7.1.1__fa1.log | 0 ...of-00002__rocm7.1.1__fa1__longctx32768.log | 0 ...-00001-of-00002__rocm7.1.1__hblt0__fa1.log | 0 ...2__rocm7.1.1__hblt0__fa1__longctx32768.log | 0 ..._XL-00001-of-00002__vulkan_amdvlk__fa1.log | 0 ...0002__vulkan_amdvlk__fa1__longctx32768.log | 0 ..._K_XL-00001-of-00002__vulkan_radv__fa1.log | 0 ...-00002__vulkan_radv__fa1__longctx32768.log | 0 ...-Q6_K_XL-00001-of-00003__rocm-7.2__fa1.log | 0 ...-of-00003__rocm-7.2__fa1__longctx32768.log | 0 ...L-00001-of-00003__rocm-7.2__hblt0__fa1.log | 0 ...03__rocm-7.2__hblt0__fa1__longctx32768.log | 0 ...Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log | 0 ...of-00003__rocm6_4_4__fa1__longctx32768.log | 0 ...-00001-of-00003__rocm6_4_4__hblt0__fa1.log | 0 ...3__rocm6_4_4__hblt0__fa1__longctx32768.log | 0 ...L-00001-of-00003__rocm7-nightlies__fa1.log | 0 ...03__rocm7-nightlies__fa1__longctx32768.log | 0 ...-of-00003__rocm7-nightlies__hblt0__fa1.log | 0 ...m7-nightlies__hblt0__fa1__longctx32768.log | 0 ...Q6_K_XL-00001-of-00003__rocm7.1.1__fa1.log | 0 ...of-00003__rocm7.1.1__fa1__longctx32768.log | 0 ...-00001-of-00003__rocm7.1.1__hblt0__fa1.log | 0 ...3__rocm7.1.1__hblt0__fa1__longctx32768.log | 0 ..._XL-00001-of-00003__vulkan_amdvlk__fa1.log | 0 ...0003__vulkan_amdvlk__fa1__longctx32768.log | 0 ..._K_XL-00001-of-00003__vulkan_radv__fa1.log | 0 ...-00003__vulkan_radv__fa1__longctx32768.log | 0 ...-Q8_K_XL-00001-of-00002__rocm-7.2__fa1.log | 8 + ...-of-00002__rocm-7.2__fa1__longctx32768.log | 8 + ...L-00001-of-00002__rocm-7.2__hblt0__fa1.log | 8 + ...02__rocm-7.2__hblt0__fa1__longctx32768.log | 8 + ...Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log | 8 + ...of-00002__rocm6_4_4__fa1__longctx32768.log | 8 + ...-00001-of-00002__rocm6_4_4__hblt0__fa1.log | 8 + ...2__rocm6_4_4__hblt0__fa1__longctx32768.log | 8 + ...L-00001-of-00002__rocm7-nightlies__fa1.log | 8 + ...02__rocm7-nightlies__fa1__longctx32768.log | 8 + ...-of-00002__rocm7-nightlies__hblt0__fa1.log | 8 + ...m7-nightlies__hblt0__fa1__longctx32768.log | 8 + ...Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log | 0 ...of-00002__rocm7.1.1__fa1__longctx32768.log | 0 ...-00001-of-00002__rocm7.1.1__hblt0__fa1.log | 0 ...2__rocm7.1.1__hblt0__fa1__longctx32768.log | 0 ..._XL-00001-of-00002__vulkan_amdvlk__fa1.log | 8 + ...0002__vulkan_amdvlk__fa1__longctx32768.log | 8 + ..._K_XL-00001-of-00002__vulkan_radv__fa1.log | 8 + ...-00002__vulkan_radv__fa1__longctx32768.log | 8 + ...-14B-Instruct-2512-BF16__rocm-7.2__fa1.log | 8 + ...2512-BF16__rocm-7.2__fa1__longctx32768.log | 8 + ...struct-2512-BF16__rocm-7.2__hblt0__fa1.log | 8 + ...16__rocm-7.2__hblt0__fa1__longctx32768.log | 8 + ...14B-Instruct-2512-BF16__rocm6_4_4__fa1.log | 8 + ...512-BF16__rocm6_4_4__fa1__longctx32768.log | 8 + ...truct-2512-BF16__rocm6_4_4__hblt0__fa1.log | 8 + ...6__rocm6_4_4__hblt0__fa1__longctx32768.log | 8 + ...struct-2512-BF16__rocm7-nightlies__fa1.log | 8 + ...16__rocm7-nightlies__fa1__longctx32768.log | 8 + ...2512-BF16__rocm7-nightlies__hblt0__fa1.log | 8 + ...m7-nightlies__hblt0__fa1__longctx32768.log | 8 + ...14B-Instruct-2512-BF16__rocm7.1.1__fa1.log | 0 ...512-BF16__rocm7.1.1__fa1__longctx32768.log | 0 ...truct-2512-BF16__rocm7.1.1__hblt0__fa1.log | 0 ...6__rocm7.1.1__hblt0__fa1__longctx32768.log | 0 ...Instruct-2512-BF16__vulkan_amdvlk__fa1.log | 8 + ...BF16__vulkan_amdvlk__fa1__longctx32768.log | 8 + ...B-Instruct-2512-BF16__vulkan_radv__fa1.log | 8 + ...2-BF16__vulkan_radv__fa1__longctx32768.log | 8 + ...Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__fa1.log | 8 + ...D-Q8_K_XL__rocm-7.2__fa1__longctx32768.log | 8 + ...B-A3B-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log | 8 + ...XL__rocm-7.2__hblt0__fa1__longctx32768.log | 8 + ...ano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log | 8 + ...-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log | 8 + ...-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log | 8 + ...L__rocm6_4_4__hblt0__fa1__longctx32768.log | 8 + ...B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log | 8 + ...XL__rocm7-nightlies__fa1__longctx32768.log | 8 + ...D-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log | 8 + ...m7-nightlies__hblt0__fa1__longctx32768.log | 8 + ...ano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1.log | 0 ...-Q8_K_XL__rocm7.1.1__fa1__longctx32768.log | 0 ...-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log | 0 ...L__rocm7.1.1__hblt0__fa1__longctx32768.log | 0 ...30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log | 8 + ...K_XL__vulkan_amdvlk__fa1__longctx32768.log | 8 + ...o-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log | 8 + ...8_K_XL__vulkan_radv__fa1__longctx32768.log | 8 + ...-Q3_K_XL-00001-of-00003__rocm-7.2__fa1.log | 8 + ...-of-00003__rocm-7.2__fa1__longctx32768.log | 8 + ...L-00001-of-00003__rocm-7.2__hblt0__fa1.log | 8 + ...03__rocm-7.2__hblt0__fa1__longctx32768.log | 8 + ...Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log | 8 + ...of-00003__rocm6_4_4__fa1__longctx32768.log | 8 + ...-00001-of-00003__rocm6_4_4__hblt0__fa1.log | 8 + ...3__rocm6_4_4__hblt0__fa1__longctx32768.log | 8 + ...L-00001-of-00003__rocm7-nightlies__fa1.log | 8 + ...03__rocm7-nightlies__fa1__longctx32768.log | 8 + ...-of-00003__rocm7-nightlies__hblt0__fa1.log | 8 + ...m7-nightlies__hblt0__fa1__longctx32768.log | 8 + ...Q3_K_XL-00001-of-00003__rocm7.1.1__fa1.log | 0 ...of-00003__rocm7.1.1__fa1__longctx32768.log | 0 ...-00001-of-00003__rocm7.1.1__hblt0__fa1.log | 0 ...3__rocm7.1.1__hblt0__fa1__longctx32768.log | 0 ..._XL-00001-of-00003__vulkan_amdvlk__fa1.log | 8 + ...0003__vulkan_amdvlk__fa1__longctx32768.log | 8 + ..._K_XL-00001-of-00003__vulkan_radv__fa1.log | 8 + ...-00003__vulkan_radv__fa1__longctx32768.log | 8 + ...A3B-BF16-00001-of-00002__rocm-7.2__fa1.log | 8 + ...-of-00002__rocm-7.2__fa1__longctx32768.log | 8 + ...6-00001-of-00002__rocm-7.2__hblt0__fa1.log | 8 + ...02__rocm-7.2__hblt0__fa1__longctx32768.log | 8 + ...3B-BF16-00001-of-00002__rocm6_4_4__fa1.log | 8 + ...of-00002__rocm6_4_4__fa1__longctx32768.log | 8 + ...-00001-of-00002__rocm6_4_4__hblt0__fa1.log | 8 + ...2__rocm6_4_4__hblt0__fa1__longctx32768.log | 8 + ...6-00001-of-00002__rocm7-nightlies__fa1.log | 8 + ...02__rocm7-nightlies__fa1__longctx32768.log | 8 + ...-of-00002__rocm7-nightlies__hblt0__fa1.log | 8 + ...m7-nightlies__hblt0__fa1__longctx32768.log | 8 + ...3B-BF16-00001-of-00002__rocm7.1.1__fa1.log | 0 ...of-00002__rocm7.1.1__fa1__longctx32768.log | 0 ...-00001-of-00002__rocm7.1.1__hblt0__fa1.log | 0 ...2__rocm7.1.1__hblt0__fa1__longctx32768.log | 0 ...F16-00001-of-00002__vulkan_amdvlk__fa1.log | 8 + ...0002__vulkan_amdvlk__fa1__longctx32768.log | 8 + ...-BF16-00001-of-00002__vulkan_radv__fa1.log | 8 + ...-00002__vulkan_radv__fa1__longctx32768.log | 8 + ...nstruct-2507-UD-Q6_K_XL__rocm-7.2__fa1.log | 8 + ...D-Q6_K_XL__rocm-7.2__fa1__longctx32768.log | 8 + ...-2507-UD-Q6_K_XL__rocm-7.2__hblt0__fa1.log | 8 + ...XL__rocm-7.2__hblt0__fa1__longctx32768.log | 8 + ...struct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log | 8 + ...-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log | 8 + ...2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log | 8 + ...L__rocm6_4_4__hblt0__fa1__longctx32768.log | 8 + ...-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log | 8 + ...XL__rocm7-nightlies__fa1__longctx32768.log | 8 + ...D-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log | 8 + ...m7-nightlies__hblt0__fa1__longctx32768.log | 8 + ...struct-2507-UD-Q6_K_XL__rocm7.1.1__fa1.log | 0 ...-Q6_K_XL__rocm7.1.1__fa1__longctx32768.log | 0 ...2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1.log | 0 ...L__rocm7.1.1__hblt0__fa1__longctx32768.log | 0 ...ct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log | 8 + ...K_XL__vulkan_amdvlk__fa1__longctx32768.log | 8 + ...ruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log | 8 + ...6_K_XL__vulkan_radv__fa1__longctx32768.log | 8 + ...30B-A3B-Instruct-Q4_K_M__rocm-7.2__fa1.log | 8 + ...ct-Q4_K_M__rocm-7.2__fa1__longctx32768.log | 8 + ...-Instruct-Q4_K_M__rocm-7.2__hblt0__fa1.log | 8 + ..._M__rocm-7.2__hblt0__fa1__longctx32768.log | 8 + ...0B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log | 8 + ...t-Q4_K_M__rocm6_4_4__fa1__longctx32768.log | 8 + ...Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log | 8 + ...M__rocm6_4_4__hblt0__fa1__longctx32768.log | 8 + ...-Instruct-Q4_K_M__rocm7-nightlies__fa1.log | 8 + ..._M__rocm7-nightlies__fa1__longctx32768.log | 8 + ...ct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log | 8 + ...m7-nightlies__hblt0__fa1__longctx32768.log | 8 + ...0B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1.log | 0 ...t-Q4_K_M__rocm7.1.1__fa1__longctx32768.log | 0 ...Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1.log | 0 ...M__rocm7.1.1__hblt0__fa1__longctx32768.log | 0 ...3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log | 8 + ..._K_M__vulkan_amdvlk__fa1__longctx32768.log | 8 + ...-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log | 8 + ...Q4_K_M__vulkan_radv__fa1__longctx32768.log | 8 + ...-Q8_K_XL-00001-of-00002__rocm-7.2__fa1.log | 8 + ...-of-00002__rocm-7.2__fa1__longctx32768.log | 8 + ...L-00001-of-00002__rocm-7.2__hblt0__fa1.log | 8 + ...02__rocm-7.2__hblt0__fa1__longctx32768.log | 8 + ...Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log | 8 + ...of-00002__rocm6_4_4__fa1__longctx32768.log | 8 + ...-00001-of-00002__rocm6_4_4__hblt0__fa1.log | 8 + ...2__rocm6_4_4__hblt0__fa1__longctx32768.log | 8 + ...L-00001-of-00002__rocm7-nightlies__fa1.log | 8 + ...02__rocm7-nightlies__fa1__longctx32768.log | 8 + ...-of-00002__rocm7-nightlies__hblt0__fa1.log | 8 + ...m7-nightlies__hblt0__fa1__longctx32768.log | 8 + ...Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log | 0 ...of-00002__rocm7.1.1__fa1__longctx32768.log | 0 ...-00001-of-00002__rocm7.1.1__hblt0__fa1.log | 0 ...2__rocm7.1.1__hblt0__fa1__longctx32768.log | 0 ..._XL-00001-of-00002__vulkan_amdvlk__fa1.log | 8 + ...0002__vulkan_amdvlk__fa1__longctx32768.log | 8 + ..._K_XL-00001-of-00002__vulkan_radv__fa1.log | 8 + ...-00002__vulkan_radv__fa1__longctx32768.log | 8 + ...mma-3-12b-it-UD-Q8_K_XL__rocm-7.2__fa1.log | 8 + ...D-Q8_K_XL__rocm-7.2__fa1__longctx32768.log | 8 + ...2b-it-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log | 8 + ...XL__rocm-7.2__hblt0__fa1__longctx32768.log | 8 + ...ma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log | 8 + ...-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log | 8 + ...b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log | 8 + ...L__rocm6_4_4__hblt0__fa1__longctx32768.log | 8 + ...2b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log | 8 + ...XL__rocm7-nightlies__fa1__longctx32768.log | 8 + ...D-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log | 8 + ...m7-nightlies__hblt0__fa1__longctx32768.log | 8 + ...ma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1.log | 0 ...-Q8_K_XL__rocm7.1.1__fa1__longctx32768.log | 0 ...b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log | 0 ...L__rocm7.1.1__hblt0__fa1__longctx32768.log | 0 ...-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log | 8 + ...K_XL__vulkan_amdvlk__fa1__longctx32768.log | 8 + ...-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log | 8 + ...8_K_XL__vulkan_radv__fa1__longctx32768.log | 8 + ...-it-BF16-00001-of-00002__rocm-7.2__fa1.log | 8 + ...-of-00002__rocm-7.2__fa1__longctx32768.log | 8 + ...6-00001-of-00002__rocm-7.2__hblt0__fa1.log | 8 + ...02__rocm-7.2__hblt0__fa1__longctx32768.log | 8 + ...it-BF16-00001-of-00002__rocm6_4_4__fa1.log | 8 + ...of-00002__rocm6_4_4__fa1__longctx32768.log | 8 + ...-00001-of-00002__rocm6_4_4__hblt0__fa1.log | 8 + ...2__rocm6_4_4__hblt0__fa1__longctx32768.log | 8 + ...6-00001-of-00002__rocm7-nightlies__fa1.log | 8 + ...02__rocm7-nightlies__fa1__longctx32768.log | 8 + ...-of-00002__rocm7-nightlies__hblt0__fa1.log | 8 + ...m7-nightlies__hblt0__fa1__longctx32768.log | 8 + ...it-BF16-00001-of-00002__rocm7.1.1__fa1.log | 0 ...of-00002__rocm7.1.1__fa1__longctx32768.log | 0 ...-00001-of-00002__rocm7.1.1__hblt0__fa1.log | 0 ...2__rocm7.1.1__hblt0__fa1__longctx32768.log | 0 ...-it-BF16-00001-of-00002__rocm7_rc__fa1.log | 0 ...F16-00001-of-00002__vulkan_amdvlk__fa1.log | 8 + ...0002__vulkan_amdvlk__fa1__longctx32768.log | 8 + ...-BF16-00001-of-00002__vulkan_radv__fa1.log | 8 + ...-00002__vulkan_radv__fa1__longctx32768.log | 8 + .../gemma-3-4b-it-Q3_K_S__rocm-7.2__fa1.log | 8 + ...it-Q3_K_S__rocm-7.2__fa1__longctx32768.log | 8 + ...a-3-4b-it-Q3_K_S__rocm-7.2__hblt0__fa1.log | 8 + ..._S__rocm-7.2__hblt0__fa1__longctx32768.log | 8 + .../gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log | 8 + ...t-Q3_K_S__rocm6_4_4__fa1__longctx32768.log | 8 + ...-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log | 8 + ...S__rocm6_4_4__hblt0__fa1__longctx32768.log | 8 + ...a-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log | 8 + ..._S__rocm7-nightlies__fa1__longctx32768.log | 8 + ...it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log | 8 + ...m7-nightlies__hblt0__fa1__longctx32768.log | 8 + .../gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1.log | 0 ...t-Q3_K_S__rocm7.1.1__fa1__longctx32768.log | 0 ...-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1.log | 0 ...S__rocm7.1.1__hblt0__fa1__longctx32768.log | 0 ...mma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log | 8 + ..._K_S__vulkan_amdvlk__fa1__longctx32768.log | 8 + ...gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log | 8 + ...Q3_K_S__vulkan_radv__fa1__longctx32768.log | 8 + ...0b-mxfp4-00001-of-00003__rocm-7.2__fa1.log | 8 + ...-of-00003__rocm-7.2__fa1__longctx32768.log | 8 + ...4-00001-of-00003__rocm-7.2__hblt0__fa1.log | 8 + ...03__rocm-7.2__hblt0__fa1__longctx32768.log | 8 + ...b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log | 8 + ...of-00003__rocm6_4_4__fa1__longctx32768.log | 8 + ...-00001-of-00003__rocm6_4_4__hblt0__fa1.log | 8 + ...3__rocm6_4_4__hblt0__fa1__longctx32768.log | 8 + ...4-00001-of-00003__rocm7-nightlies__fa1.log | 8 + ...03__rocm7-nightlies__fa1__longctx32768.log | 8 + ...-of-00003__rocm7-nightlies__hblt0__fa1.log | 8 + ...m7-nightlies__hblt0__fa1__longctx32768.log | 8 + ...b-mxfp4-00001-of-00003__rocm7.1.1__fa1.log | 0 ...of-00003__rocm7.1.1__fa1__longctx32768.log | 0 ...-00001-of-00003__rocm7.1.1__hblt0__fa1.log | 0 ...3__rocm7.1.1__hblt0__fa1__longctx32768.log | 0 ...fp4-00001-of-00003__vulkan_amdvlk__fa1.log | 8 + ...0003__vulkan_amdvlk__fa1__longctx32768.log | 8 + ...mxfp4-00001-of-00003__vulkan_radv__fa1.log | 8 + ...-00003__vulkan_radv__fa1__longctx32768.log | 8 + .../gpt-oss-20b-mxfp4__rocm-7.2__fa1.log | 8 + ...20b-mxfp4__rocm-7.2__fa1__longctx32768.log | 8 + ...pt-oss-20b-mxfp4__rocm-7.2__hblt0__fa1.log | 8 + ...p4__rocm-7.2__hblt0__fa1__longctx32768.log | 8 + .../gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log | 8 + ...0b-mxfp4__rocm6_4_4__fa1__longctx32768.log | 8 + ...t-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log | 8 + ...4__rocm6_4_4__hblt0__fa1__longctx32768.log | 8 + ...pt-oss-20b-mxfp4__rocm7-nightlies__fa1.log | 8 + ...p4__rocm7-nightlies__fa1__longctx32768.log | 8 + ...20b-mxfp4__rocm7-nightlies__hblt0__fa1.log | 8 + ...m7-nightlies__hblt0__fa1__longctx32768.log | 8 + .../gpt-oss-20b-mxfp4__rocm7.1.1__fa1.log | 0 ...0b-mxfp4__rocm7.1.1__fa1__longctx32768.log | 0 ...t-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1.log | 0 ...4__rocm7.1.1__hblt0__fa1__longctx32768.log | 0 .../gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log | 8 + ...xfp4__vulkan_amdvlk__fa1__longctx32768.log | 8 + .../gpt-oss-20b-mxfp4__vulkan_radv__fa1.log | 8 + ...-mxfp4__vulkan_radv__fa1__longctx32768.log | 8 + .../llama-2-7b.Q4_0__rocm-7.2__fa1.log | 8 + ...2-7b.Q4_0__rocm-7.2__fa1__longctx32768.log | 8 + .../llama-2-7b.Q4_0__rocm-7.2__hblt0__fa1.log | 8 + ..._0__rocm-7.2__hblt0__fa1__longctx32768.log | 8 + .../llama-2-7b.Q4_0__rocm6_4_4__fa1.log | 8 + ...-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log | 8 + ...llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log | 8 + ...0__rocm6_4_4__hblt0__fa1__longctx32768.log | 8 + .../llama-2-7b.Q4_0__rocm7-nightlies__fa1.log | 8 + ..._0__rocm7-nightlies__fa1__longctx32768.log | 8 + ...2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log | 8 + ...m7-nightlies__hblt0__fa1__longctx32768.log | 8 + .../llama-2-7b.Q4_0__rocm7.1.1__fa1.log | 0 ...-7b.Q4_0__rocm7.1.1__fa1__longctx32768.log | 0 ...llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1.log | 0 ...0__rocm7.1.1__hblt0__fa1__longctx32768.log | 0 .../llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log | 8 + ...Q4_0__vulkan_amdvlk__fa1__longctx32768.log | 8 + .../llama-2-7b.Q4_0__vulkan_radv__fa1.log | 8 + ...b.Q4_0__vulkan_radv__fa1__longctx32768.log | 8 + benchmark/results/09-01-2026/system_info.json | 6 + ...-Q4_K_XL-00001-of-00002__rocm-7.2__fa1.log | 6 +- ...-of-00002__rocm-7.2__fa1__longctx32768.log | 6 +- ...L-00001-of-00002__rocm-7.2__hblt0__fa1.log | 6 +- ...02__rocm-7.2__hblt0__fa1__longctx32768.log | 6 +- ...Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log | 10 +- ...of-00002__rocm6_4_4__fa1__longctx32768.log | 10 +- ...-00001-of-00002__rocm6_4_4__hblt0__fa1.log | 10 +- ...2__rocm6_4_4__hblt0__fa1__longctx32768.log | 10 +- ...L-00001-of-00002__rocm7-nightlies__fa1.log | 10 +- ...02__rocm7-nightlies__fa1__longctx32768.log | 22 +- ...-of-00002__rocm7-nightlies__hblt0__fa1.log | 10 +- ...m7-nightlies__hblt0__fa1__longctx32768.log | 10 +- ..._XL-00001-of-00002__vulkan_amdvlk__fa1.log | 10 +- ...0002__vulkan_amdvlk__fa1__longctx32768.log | 10 +- ..._K_XL-00001-of-00002__vulkan_radv__fa1.log | 10 +- ...-00002__vulkan_radv__fa1__longctx32768.log | 10 +- ...ash-BF16-00001-of-00002__rocm-7.2__fa1.log | 8 + ...-of-00002__rocm-7.2__fa1__longctx32768.log | 8 + ...6-00001-of-00002__rocm-7.2__hblt0__fa1.log | 8 + ...02__rocm-7.2__hblt0__fa1__longctx32768.log | 8 + ...sh-BF16-00001-of-00002__rocm6_4_4__fa1.log | 8 + ...of-00002__rocm6_4_4__fa1__longctx32768.log | 8 + ...-00001-of-00002__rocm6_4_4__hblt0__fa1.log | 8 + ...2__rocm6_4_4__hblt0__fa1__longctx32768.log | 8 + ...6-00001-of-00002__rocm7-nightlies__fa1.log | 8 + ...02__rocm7-nightlies__fa1__longctx32768.log | 8 + ...-of-00002__rocm7-nightlies__hblt0__fa1.log | 8 + ...m7-nightlies__hblt0__fa1__longctx32768.log | 8 + ...F16-00001-of-00002__vulkan_amdvlk__fa1.log | 8 + ...0002__vulkan_amdvlk__fa1__longctx32768.log | 8 + ...-BF16-00001-of-00002__vulkan_radv__fa1.log | 8 + ...-00002__vulkan_radv__fa1__longctx32768.log | 8 + ...LM-4.7-Flash-UD-Q8_K_XL__rocm-7.2__fa1.log | 8 + ...D-Q8_K_XL__rocm-7.2__fa1__longctx32768.log | 8 + ...Flash-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log | 8 + ...XL__rocm-7.2__hblt0__fa1__longctx32768.log | 8 + ...M-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log | 8 + ...-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log | 8 + ...lash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log | 8 + ...L__rocm6_4_4__hblt0__fa1__longctx32768.log | 8 + ...Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log | 8 + ...XL__rocm7-nightlies__fa1__longctx32768.log | 8 + ...D-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log | 8 + ...m7-nightlies__hblt0__fa1__longctx32768.log | 8 + ...7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log | 8 + ...K_XL__vulkan_amdvlk__fa1__longctx32768.log | 8 + ...4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log | 8 + ...8_K_XL__vulkan_radv__fa1__longctx32768.log | 8 + ...-Q8_K_XL-00001-of-00002__rocm-7.2__fa1.log | 4 +- ...-of-00002__rocm-7.2__fa1__longctx32768.log | 6 +- ...L-00001-of-00002__rocm-7.2__hblt0__fa1.log | 4 +- ...02__rocm-7.2__hblt0__fa1__longctx32768.log | 6 +- ...Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log | 10 +- ...of-00002__rocm6_4_4__fa1__longctx32768.log | 10 +- ...-00001-of-00002__rocm6_4_4__hblt0__fa1.log | 10 +- ...2__rocm6_4_4__hblt0__fa1__longctx32768.log | 10 +- ...L-00001-of-00002__rocm7-nightlies__fa1.log | 10 +- ...02__rocm7-nightlies__fa1__longctx32768.log | 10 +- ...-of-00002__rocm7-nightlies__hblt0__fa1.log | 10 +- ...m7-nightlies__hblt0__fa1__longctx32768.log | 10 +- ..._XL-00001-of-00002__vulkan_amdvlk__fa1.log | 10 +- ...0002__vulkan_amdvlk__fa1__longctx32768.log | 10 +- ..._K_XL-00001-of-00002__vulkan_radv__fa1.log | 10 +- ...-00002__vulkan_radv__fa1__longctx32768.log | 10 +- ...-14B-Instruct-2512-BF16__rocm-7.2__fa1.log | 4 +- ...2512-BF16__rocm-7.2__fa1__longctx32768.log | 4 +- ...struct-2512-BF16__rocm-7.2__hblt0__fa1.log | 4 +- ...16__rocm-7.2__hblt0__fa1__longctx32768.log | 4 +- ...14B-Instruct-2512-BF16__rocm6_4_4__fa1.log | 10 +- ...512-BF16__rocm6_4_4__fa1__longctx32768.log | 10 +- ...truct-2512-BF16__rocm6_4_4__hblt0__fa1.log | 10 +- ...6__rocm6_4_4__hblt0__fa1__longctx32768.log | 10 +- ...struct-2512-BF16__rocm7-nightlies__fa1.log | 10 +- ...16__rocm7-nightlies__fa1__longctx32768.log | 10 +- ...2512-BF16__rocm7-nightlies__hblt0__fa1.log | 10 +- ...m7-nightlies__hblt0__fa1__longctx32768.log | 10 +- ...Instruct-2512-BF16__vulkan_amdvlk__fa1.log | 10 +- ...BF16__vulkan_amdvlk__fa1__longctx32768.log | 10 +- ...B-Instruct-2512-BF16__vulkan_radv__fa1.log | 10 +- ...2-BF16__vulkan_radv__fa1__longctx32768.log | 10 +- ...Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__fa1.log | 6 +- ...D-Q8_K_XL__rocm-7.2__fa1__longctx32768.log | 6 +- ...B-A3B-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log | 6 +- ...XL__rocm-7.2__hblt0__fa1__longctx32768.log | 6 +- ...ano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log | 10 +- ...-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log | 10 +- ...-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log | 10 +- ...L__rocm6_4_4__hblt0__fa1__longctx32768.log | 10 +- ...B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log | 10 +- ...XL__rocm7-nightlies__fa1__longctx32768.log | 10 +- ...D-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log | 10 +- ...m7-nightlies__hblt0__fa1__longctx32768.log | 10 +- ...30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log | 10 +- ...K_XL__vulkan_amdvlk__fa1__longctx32768.log | 10 +- ...o-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log | 10 +- ...8_K_XL__vulkan_radv__fa1__longctx32768.log | 10 +- ...-Q3_K_XL-00001-of-00003__rocm-7.2__fa1.log | 6 +- ...-of-00003__rocm-7.2__fa1__longctx32768.log | 6 +- ...L-00001-of-00003__rocm-7.2__hblt0__fa1.log | 6 +- ...03__rocm-7.2__hblt0__fa1__longctx32768.log | 6 +- ...Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log | 10 +- ...of-00003__rocm6_4_4__fa1__longctx32768.log | 10 +- ...-00001-of-00003__rocm6_4_4__hblt0__fa1.log | 10 +- ...3__rocm6_4_4__hblt0__fa1__longctx32768.log | 10 +- ...L-00001-of-00003__rocm7-nightlies__fa1.log | 10 +- ...03__rocm7-nightlies__fa1__longctx32768.log | 10 +- ...-of-00003__rocm7-nightlies__hblt0__fa1.log | 10 +- ...m7-nightlies__hblt0__fa1__longctx32768.log | 10 +- ..._XL-00001-of-00003__vulkan_amdvlk__fa1.log | 10 +- ...0003__vulkan_amdvlk__fa1__longctx32768.log | 10 +- ..._K_XL-00001-of-00003__vulkan_radv__fa1.log | 10 +- ...-00003__vulkan_radv__fa1__longctx32768.log | 10 +- ...A3B-BF16-00001-of-00002__rocm-7.2__fa1.log | 6 +- ...-of-00002__rocm-7.2__fa1__longctx32768.log | 6 +- ...6-00001-of-00002__rocm-7.2__hblt0__fa1.log | 6 +- ...02__rocm-7.2__hblt0__fa1__longctx32768.log | 6 +- ...3B-BF16-00001-of-00002__rocm6_4_4__fa1.log | 10 +- ...of-00002__rocm6_4_4__fa1__longctx32768.log | 10 +- ...-00001-of-00002__rocm6_4_4__hblt0__fa1.log | 10 +- ...2__rocm6_4_4__hblt0__fa1__longctx32768.log | 10 +- ...6-00001-of-00002__rocm7-nightlies__fa1.log | 10 +- ...02__rocm7-nightlies__fa1__longctx32768.log | 10 +- ...-of-00002__rocm7-nightlies__hblt0__fa1.log | 10 +- ...m7-nightlies__hblt0__fa1__longctx32768.log | 10 +- ...F16-00001-of-00002__vulkan_amdvlk__fa1.log | 10 +- ...0002__vulkan_amdvlk__fa1__longctx32768.log | 10 +- ...-BF16-00001-of-00002__vulkan_radv__fa1.log | 10 +- ...-00002__vulkan_radv__fa1__longctx32768.log | 10 +- ...nstruct-2507-UD-Q6_K_XL__rocm-7.2__fa1.log | 6 +- ...D-Q6_K_XL__rocm-7.2__fa1__longctx32768.log | 6 +- ...-2507-UD-Q6_K_XL__rocm-7.2__hblt0__fa1.log | 6 +- ...XL__rocm-7.2__hblt0__fa1__longctx32768.log | 6 +- ...struct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log | 10 +- ...-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log | 10 +- ...2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log | 10 +- ...L__rocm6_4_4__hblt0__fa1__longctx32768.log | 10 +- ...-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log | 10 +- ...XL__rocm7-nightlies__fa1__longctx32768.log | 10 +- ...D-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log | 10 +- ...m7-nightlies__hblt0__fa1__longctx32768.log | 10 +- ...ct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log | 10 +- ...K_XL__vulkan_amdvlk__fa1__longctx32768.log | 10 +- ...ruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log | 10 +- ...6_K_XL__vulkan_radv__fa1__longctx32768.log | 10 +- ...30B-A3B-Instruct-Q4_K_M__rocm-7.2__fa1.log | 6 +- ...ct-Q4_K_M__rocm-7.2__fa1__longctx32768.log | 6 +- ...-Instruct-Q4_K_M__rocm-7.2__hblt0__fa1.log | 6 +- ..._M__rocm-7.2__hblt0__fa1__longctx32768.log | 4 +- ...0B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log | 10 +- ...t-Q4_K_M__rocm6_4_4__fa1__longctx32768.log | 10 +- ...Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log | 10 +- ...M__rocm6_4_4__hblt0__fa1__longctx32768.log | 10 +- ...-Instruct-Q4_K_M__rocm7-nightlies__fa1.log | 10 +- ..._M__rocm7-nightlies__fa1__longctx32768.log | 10 +- ...ct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log | 10 +- ...m7-nightlies__hblt0__fa1__longctx32768.log | 10 +- ...3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log | 10 +- ..._K_M__vulkan_amdvlk__fa1__longctx32768.log | 10 +- ...-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log | 10 +- ...Q4_K_M__vulkan_radv__fa1__longctx32768.log | 10 +- ...-Q8_K_XL-00001-of-00002__rocm-7.2__fa1.log | 6 +- ...-of-00002__rocm-7.2__fa1__longctx32768.log | 6 +- ...L-00001-of-00002__rocm-7.2__hblt0__fa1.log | 6 +- ...02__rocm-7.2__hblt0__fa1__longctx32768.log | 6 +- ...Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log | 10 +- ...of-00002__rocm6_4_4__fa1__longctx32768.log | 10 +- ...-00001-of-00002__rocm6_4_4__hblt0__fa1.log | 10 +- ...2__rocm6_4_4__hblt0__fa1__longctx32768.log | 10 +- ...L-00001-of-00002__rocm7-nightlies__fa1.log | 10 +- ...02__rocm7-nightlies__fa1__longctx32768.log | 10 +- ...-of-00002__rocm7-nightlies__hblt0__fa1.log | 10 +- ...m7-nightlies__hblt0__fa1__longctx32768.log | 10 +- ..._XL-00001-of-00002__vulkan_amdvlk__fa1.log | 10 +- ...0002__vulkan_amdvlk__fa1__longctx32768.log | 10 +- ..._K_XL-00001-of-00002__vulkan_radv__fa1.log | 10 +- ...-00002__vulkan_radv__fa1__longctx32768.log | 10 +- ...mma-3-12b-it-UD-Q8_K_XL__rocm-7.2__fa1.log | 4 +- ...D-Q8_K_XL__rocm-7.2__fa1__longctx32768.log | 6 +- ...2b-it-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log | 6 +- ...XL__rocm-7.2__hblt0__fa1__longctx32768.log | 4 +- ...ma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log | 10 +- ...-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log | 10 +- ...b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log | 10 +- ...L__rocm6_4_4__hblt0__fa1__longctx32768.log | 10 +- ...2b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log | 10 +- ...XL__rocm7-nightlies__fa1__longctx32768.log | 10 +- ...D-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log | 10 +- ...m7-nightlies__hblt0__fa1__longctx32768.log | 10 +- ...-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log | 10 +- ...K_XL__vulkan_amdvlk__fa1__longctx32768.log | 10 +- ...-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log | 10 +- ...8_K_XL__vulkan_radv__fa1__longctx32768.log | 10 +- ...-it-BF16-00001-of-00002__rocm-7.2__fa1.log | 6 +- ...-of-00002__rocm-7.2__fa1__longctx32768.log | 6 +- ...6-00001-of-00002__rocm-7.2__hblt0__fa1.log | 6 +- ...02__rocm-7.2__hblt0__fa1__longctx32768.log | 6 +- ...it-BF16-00001-of-00002__rocm6_4_4__fa1.log | 10 +- ...of-00002__rocm6_4_4__fa1__longctx32768.log | 10 +- ...-00001-of-00002__rocm6_4_4__hblt0__fa1.log | 10 +- ...2__rocm6_4_4__hblt0__fa1__longctx32768.log | 10 +- ...6-00001-of-00002__rocm7-nightlies__fa1.log | 10 +- ...02__rocm7-nightlies__fa1__longctx32768.log | 10 +- ...-of-00002__rocm7-nightlies__hblt0__fa1.log | 10 +- ...m7-nightlies__hblt0__fa1__longctx32768.log | 10 +- ...F16-00001-of-00002__vulkan_amdvlk__fa1.log | 10 +- ...0002__vulkan_amdvlk__fa1__longctx32768.log | 10 +- ...-BF16-00001-of-00002__vulkan_radv__fa1.log | 10 +- ...-00002__vulkan_radv__fa1__longctx32768.log | 10 +- .../gemma-3-4b-it-Q3_K_S__rocm-7.2__fa1.log | 6 +- ...it-Q3_K_S__rocm-7.2__fa1__longctx32768.log | 6 +- ...a-3-4b-it-Q3_K_S__rocm-7.2__hblt0__fa1.log | 6 +- ..._S__rocm-7.2__hblt0__fa1__longctx32768.log | 6 +- .../gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log | 10 +- ...t-Q3_K_S__rocm6_4_4__fa1__longctx32768.log | 10 +- ...-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log | 10 +- ...S__rocm6_4_4__hblt0__fa1__longctx32768.log | 10 +- ...a-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log | 10 +- ..._S__rocm7-nightlies__fa1__longctx32768.log | 10 +- ...it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log | 10 +- ...m7-nightlies__hblt0__fa1__longctx32768.log | 10 +- ...mma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log | 10 +- ..._K_S__vulkan_amdvlk__fa1__longctx32768.log | 10 +- ...gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log | 10 +- ...Q3_K_S__vulkan_radv__fa1__longctx32768.log | 10 +- ...0b-mxfp4-00001-of-00003__rocm-7.2__fa1.log | 6 +- ...-of-00003__rocm-7.2__fa1__longctx32768.log | 6 +- ...4-00001-of-00003__rocm-7.2__hblt0__fa1.log | 6 +- ...03__rocm-7.2__hblt0__fa1__longctx32768.log | 6 +- ...b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log | 10 +- ...of-00003__rocm6_4_4__fa1__longctx32768.log | 10 +- ...-00001-of-00003__rocm6_4_4__hblt0__fa1.log | 10 +- ...3__rocm6_4_4__hblt0__fa1__longctx32768.log | 10 +- ...4-00001-of-00003__rocm7-nightlies__fa1.log | 4 +- ...03__rocm7-nightlies__fa1__longctx32768.log | 4 +- ...-of-00003__rocm7-nightlies__hblt0__fa1.log | 4 +- ...m7-nightlies__hblt0__fa1__longctx32768.log | 4 +- ...fp4-00001-of-00003__vulkan_amdvlk__fa1.log | 10 +- ...0003__vulkan_amdvlk__fa1__longctx32768.log | 10 +- ...mxfp4-00001-of-00003__vulkan_radv__fa1.log | 10 +- ...-00003__vulkan_radv__fa1__longctx32768.log | 10 +- .../gpt-oss-20b-mxfp4__rocm-7.2__fa1.log | 6 +- ...20b-mxfp4__rocm-7.2__fa1__longctx32768.log | 6 +- ...pt-oss-20b-mxfp4__rocm-7.2__hblt0__fa1.log | 6 +- ...p4__rocm-7.2__hblt0__fa1__longctx32768.log | 6 +- .../gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log | 10 +- ...0b-mxfp4__rocm6_4_4__fa1__longctx32768.log | 10 +- ...t-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log | 10 +- ...4__rocm6_4_4__hblt0__fa1__longctx32768.log | 10 +- ...pt-oss-20b-mxfp4__rocm7-nightlies__fa1.log | 4 +- ...p4__rocm7-nightlies__fa1__longctx32768.log | 4 +- ...20b-mxfp4__rocm7-nightlies__hblt0__fa1.log | 2 +- ...m7-nightlies__hblt0__fa1__longctx32768.log | 4 +- .../gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log | 10 +- ...xfp4__vulkan_amdvlk__fa1__longctx32768.log | 10 +- .../gpt-oss-20b-mxfp4__vulkan_radv__fa1.log | 10 +- ...-mxfp4__vulkan_radv__fa1__longctx32768.log | 10 +- .../llama-2-7b.Q4_0__rocm-7.2__fa1.log | 6 +- ...2-7b.Q4_0__rocm-7.2__fa1__longctx32768.log | 6 +- .../llama-2-7b.Q4_0__rocm-7.2__hblt0__fa1.log | 6 +- ..._0__rocm-7.2__hblt0__fa1__longctx32768.log | 4 +- .../llama-2-7b.Q4_0__rocm6_4_4__fa1.log | 10 +- ...-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log | 10 +- ...llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log | 10 +- ...0__rocm6_4_4__hblt0__fa1__longctx32768.log | 10 +- .../llama-2-7b.Q4_0__rocm7-nightlies__fa1.log | 10 +- ..._0__rocm7-nightlies__fa1__longctx32768.log | 10 +- ...2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log | 10 +- ...m7-nightlies__hblt0__fa1__longctx32768.log | 10 +- .../llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log | 10 +- ...Q4_0__vulkan_amdvlk__fa1__longctx32768.log | 10 +- .../llama-2-7b.Q4_0__vulkan_radv__fa1.log | 10 +- ...b.Q4_0__vulkan_radv__fa1__longctx32768.log | 10 +- benchmark/results/system_info.json | 7 +- docs/results.json | 21278 +++++++--------- 616 files changed, 11988 insertions(+), 13591 deletions(-) create mode 100644 benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1.log create mode 100644 benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log create mode 100644 benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log rename benchmark/results/{ => 09-01-2026}/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log (100%) rename benchmark/results/{ => 09-01-2026}/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log create mode 100644 benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log rename benchmark/results/{ => 09-01-2026}/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log (100%) rename benchmark/results/{ => 09-01-2026}/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log (100%) rename benchmark/results/{ => 09-01-2026}/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log (100%) rename benchmark/results/{ => 09-01-2026}/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log (100%) rename benchmark/results/{ => 09-01-2026}/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log (100%) rename benchmark/results/{ => 09-01-2026}/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log (100%) rename benchmark/results/{ => 09-01-2026}/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log (100%) rename benchmark/results/{ => 09-01-2026}/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log (100%) rename benchmark/results/{ => 09-01-2026}/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log (100%) rename benchmark/results/{ => 09-01-2026}/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log (100%) rename benchmark/results/{ => 09-01-2026}/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__fa1__longctx32768.log (100%) rename benchmark/results/{ => 09-01-2026}/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log (100%) rename benchmark/results/{ => 09-01-2026}/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log (100%) rename benchmark/results/{ => 09-01-2026}/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log (100%) rename benchmark/results/{ => 09-01-2026}/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log (100%) rename benchmark/results/{ => 09-01-2026}/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log (100%) rename benchmark/results/{ => 09-01-2026}/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx32768.log (100%) rename benchmark/results/{ => 09-01-2026}/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx32768.log (100%) rename benchmark/results/{ => 09-01-2026}/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log (100%) rename benchmark/results/{ => 09-01-2026}/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1.log create mode 100644 benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log create mode 100644 benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log rename benchmark/results/{ => 09-01-2026}/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log (100%) rename benchmark/results/{ => 09-01-2026}/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log create mode 100644 benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__fa1.log create mode 100644 benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log create mode 100644 benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log rename benchmark/results/{ => 09-01-2026}/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1__longctx32768.log (100%) rename benchmark/results/{ => 09-01-2026}/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log create mode 100644 benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__fa1.log create mode 100644 benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log create mode 100644 benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log rename benchmark/results/{ => 09-01-2026}/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1__longctx32768.log (100%) rename benchmark/results/{ => 09-01-2026}/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log create mode 100644 benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__fa1.log create mode 100644 benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log create mode 100644 benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log rename benchmark/results/{ => 09-01-2026}/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx32768.log (100%) rename benchmark/results/{ => 09-01-2026}/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log create mode 100644 benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__fa1.log create mode 100644 benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log create mode 100644 benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log rename benchmark/results/{ => 09-01-2026}/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1__longctx32768.log (100%) rename benchmark/results/{ => 09-01-2026}/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log create mode 100644 benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__fa1.log create mode 100644 benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log create mode 100644 benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log rename benchmark/results/{ => 09-01-2026}/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1__longctx32768.log (100%) rename benchmark/results/{ => 09-01-2026}/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log create mode 100644 benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__fa1.log create mode 100644 benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log create mode 100644 benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1__longctx32768.log rename benchmark/results/{ => 09-01-2026}/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1__longctx32768.log (100%) rename benchmark/results/{ => 09-01-2026}/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log create mode 100644 benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1.log create mode 100644 benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log create mode 100644 benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log rename benchmark/results/{ => 09-01-2026}/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log (100%) rename benchmark/results/{ => 09-01-2026}/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log create mode 100644 benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__fa1.log create mode 100644 benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log create mode 100644 benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log rename benchmark/results/{ => 09-01-2026}/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1__longctx32768.log (100%) rename benchmark/results/{ => 09-01-2026}/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log create mode 100644 benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__fa1.log create mode 100644 benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log create mode 100644 benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log rename benchmark/results/{ => 09-01-2026}/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1__longctx32768.log (100%) rename benchmark/results/{ => 09-01-2026}/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log (100%) rename benchmark/results/{ => 09-01-2026}/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log (100%) create mode 100644 benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log create mode 100644 benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm-7.2__fa1.log create mode 100644 benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm-7.2__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm-7.2__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm-7.2__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log create mode 100644 benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1__longctx32768.log rename benchmark/results/{ => 09-01-2026}/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1__longctx32768.log (100%) rename benchmark/results/{ => 09-01-2026}/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log create mode 100644 benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__fa1.log create mode 100644 benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log create mode 100644 benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log rename benchmark/results/{ => 09-01-2026}/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1__longctx32768.log (100%) rename benchmark/results/{ => 09-01-2026}/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log create mode 100644 benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm-7.2__fa1.log create mode 100644 benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm-7.2__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm-7.2__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm-7.2__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log create mode 100644 benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1__longctx32768.log rename benchmark/results/{ => 09-01-2026}/gpt-oss-20b-mxfp4__rocm7.1.1__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/gpt-oss-20b-mxfp4__rocm7.1.1__fa1__longctx32768.log (100%) rename benchmark/results/{ => 09-01-2026}/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log create mode 100644 benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm-7.2__fa1.log create mode 100644 benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm-7.2__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm-7.2__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm-7.2__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1.log create mode 100644 benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log create mode 100644 benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1__longctx32768.log rename benchmark/results/{ => 09-01-2026}/llama-2-7b.Q4_0__rocm7.1.1__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/llama-2-7b.Q4_0__rocm7.1.1__fa1__longctx32768.log (100%) rename benchmark/results/{ => 09-01-2026}/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1.log (100%) rename benchmark/results/{ => 09-01-2026}/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/09-01-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/09-01-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/llama-2-7b.Q4_0__vulkan_radv__fa1.log create mode 100644 benchmark/results/09-01-2026/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log create mode 100644 benchmark/results/09-01-2026/system_info.json create mode 100644 benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7.2__fa1.log create mode 100644 benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7.2__fa1__longctx32768.log create mode 100644 benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7.2__hblt0__fa1.log create mode 100644 benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1.log create mode 100644 benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log create mode 100644 benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log create mode 100644 benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1.log create mode 100644 benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log create mode 100644 benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7.2__fa1.log create mode 100644 benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7.2__fa1__longctx32768.log create mode 100644 benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log create mode 100644 benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log create mode 100644 benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log create mode 100644 benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log create mode 100644 benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log create mode 100644 benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log diff --git a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1.log b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1.log new file mode 100644 index 0000000..6fe8641 --- /dev/null +++ b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 66.52 ± 7.27 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.76 ± 0.08 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log new file mode 100644 index 0000000..f2148c0 --- /dev/null +++ b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 16.57 ± 0.05 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.08 ± 0.00 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log new file mode 100644 index 0000000..648542a --- /dev/null +++ b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 71.47 ± 0.20 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.82 ± 0.07 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..2892f38 --- /dev/null +++ b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 16.47 ± 0.03 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.06 ± 0.02 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log new file mode 100644 index 0000000..665e557 --- /dev/null +++ b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 81.03 ± 0.03 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.07 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..19a7c47 --- /dev/null +++ b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.37 ± 0.00 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.15 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..5b9f63c --- /dev/null +++ b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 79.43 ± 0.03 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.80 ± 0.05 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..623b913 --- /dev/null +++ b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.42 ± 0.00 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.15 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..a56cbfc --- /dev/null +++ b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 73.64 ± 0.05 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.74 ± 0.22 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..d33cd4c --- /dev/null +++ b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,18 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:96: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7f1a3468b5a5] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f1a3468b96b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f1a3468baef] +/usr/local/lib64/libggml-hip.so.0(+0x2d4e882) [0x7f1a37496882] +/usr/local/lib64/libggml-hip.so.0(+0x2d53c4e) [0x7f1a3749bc4e] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f1a346a2e5e] +/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f1a37b75630] +/usr/local/bin/llama-bench() [0x40ae7c] +/usr/local/bin/llama-bench() [0x408bd1] +/lib64/libc.so.6(+0x35b5) [0x7f1a340215b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f1a34021668] +/usr/local/bin/llama-bench() [0x409cf5] +✖ ! [rocm-7alpha] Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..d353fd9 --- /dev/null +++ b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 72.94 ± 2.79 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.16 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..e87a569 --- /dev/null +++ b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.14 ± 0.00 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.07 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1.log b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1.log similarity index 100% rename from benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1.log rename to benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1.log diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log rename to benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log similarity index 100% rename from benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log rename to benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log rename to benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..3690a51 --- /dev/null +++ b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | pp512 | 41.19 ± 7.76 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | tg128 | 1.87 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..fd57886 --- /dev/null +++ b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 10.37 ± 0.00 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 1.26 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log new file mode 100644 index 0000000..67ff2d8 --- /dev/null +++ b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | pp512 | 47.53 ± 0.02 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | tg128 | 2.96 ± 0.05 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..4b28cb9 --- /dev/null +++ b/benchmark/results/09-01-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 12.50 ± 0.00 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 2.27 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1.log rename to benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log rename to benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log rename to benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log rename to benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log rename to benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log rename to benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log rename to benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log rename to benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log rename to benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log rename to benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log rename to benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log rename to benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1.log rename to benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log rename to benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log rename to benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log rename to benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log rename to benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log rename to benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log rename to benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log rename to benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__fa1.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__fa1.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__fa1.log rename to benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__fa1.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__fa1__longctx32768.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__fa1__longctx32768.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__fa1__longctx32768.log rename to benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__fa1__longctx32768.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1.log rename to benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log rename to benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log rename to benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log rename to benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log rename to benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log rename to benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__fa1.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__fa1.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__fa1.log rename to benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__fa1.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log rename to benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log rename to benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log rename to benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1.log rename to benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx32768.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx32768.log rename to benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx32768.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log rename to benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx32768.log rename to benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log rename to benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log rename to benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log rename to benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log similarity index 100% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log rename to benchmark/results/09-01-2026/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log diff --git a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1.log b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1.log new file mode 100644 index 0000000..eb98cfd --- /dev/null +++ b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | pp512 | 49.50 ± 0.01 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | tg128 | 2.78 ± 0.00 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log new file mode 100644 index 0000000..6c2824c --- /dev/null +++ b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 27.87 ± 0.75 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.42 ± 0.06 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log new file mode 100644 index 0000000..1533579 --- /dev/null +++ b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | pp512 | 49.27 ± 0.02 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | tg128 | 2.79 ± 0.00 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..c15e681 --- /dev/null +++ b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 28.46 ± 0.36 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.44 ± 0.04 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log new file mode 100644 index 0000000..ce0f9af --- /dev/null +++ b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 148.26 ± 0.07 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..7b5211d --- /dev/null +++ b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 34.54 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.46 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..44dfe3c --- /dev/null +++ b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 147.21 ± 0.14 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..035ac3b --- /dev/null +++ b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 34.82 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.46 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..c7be012 --- /dev/null +++ b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 67.05 ± 0.01 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..e74c380 --- /dev/null +++ b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 26.90 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.46 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..05a9c21 --- /dev/null +++ b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 66.64 ± 0.02 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..4a65754 --- /dev/null +++ b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 26.86 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.46 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log rename to benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log rename to benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log rename to benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log rename to benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..00c2046 --- /dev/null +++ b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp512 | 100.89 ± 0.24 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg128 | 2.81 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..a1e97ab --- /dev/null +++ b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 18.12 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 2.16 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log new file mode 100644 index 0000000..1f40e8f --- /dev/null +++ b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp512 | 87.66 ± 0.55 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..4832afb --- /dev/null +++ b/benchmark/results/09-01-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 21.96 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 2.39 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__fa1.log b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__fa1.log new file mode 100644 index 0000000..921c345 --- /dev/null +++ b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 797.13 ± 2.39 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.49 ± 0.00 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__fa1__longctx32768.log b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__fa1__longctx32768.log new file mode 100644 index 0000000..9dfec45 --- /dev/null +++ b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 146.47 ± 5.52 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.11 ± 0.00 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__hblt0__fa1.log b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__hblt0__fa1.log new file mode 100644 index 0000000..0832956 --- /dev/null +++ b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 803.39 ± 2.22 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.49 ± 0.00 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..91e85e1 --- /dev/null +++ b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 152.56 ± 6.51 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.11 ± 0.00 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log new file mode 100644 index 0000000..c87eda5 --- /dev/null +++ b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 801.73 ± 2.77 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.49 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..d252ff5 --- /dev/null +++ b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 163.31 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.10 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..0a3cf91 --- /dev/null +++ b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 805.52 ± 3.18 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.48 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..1f87ce0 --- /dev/null +++ b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 164.32 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.10 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..b03e29d --- /dev/null +++ b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 798.60 ± 3.84 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.49 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..9a0fea9 --- /dev/null +++ b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 153.77 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.10 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..09d46a4 --- /dev/null +++ b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 799.84 ± 4.89 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.49 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..6ff28f3 --- /dev/null +++ b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 159.82 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.11 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1.log b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1.log similarity index 100% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1.log rename to benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1.log diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1__longctx32768.log rename to benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1__longctx32768.log diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1.log b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1.log similarity index 100% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1.log rename to benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1.log diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1__longctx32768.log rename to benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..e5b1efe --- /dev/null +++ b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp512 | 187.83 ± 22.96 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg128 | 8.19 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..d2ca7d7 --- /dev/null +++ b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 64.52 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 5.69 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log new file mode 100644 index 0000000..784b23b --- /dev/null +++ b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp512 | 195.84 ± 0.06 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg128 | 7.56 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..5e0335b --- /dev/null +++ b/benchmark/results/09-01-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 75.42 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 6.23 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__fa1.log b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__fa1.log new file mode 100644 index 0000000..a25472a --- /dev/null +++ b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | pp512 | 393.57 ± 2.37 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | tg128 | 41.69 ± 0.01 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__fa1__longctx32768.log b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__fa1__longctx32768.log new file mode 100644 index 0000000..7ccfdf7 --- /dev/null +++ b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 559.16 ± 0.99 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 39.74 ± 0.02 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log new file mode 100644 index 0000000..eaea7c6 --- /dev/null +++ b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | pp512 | 389.10 ± 3.02 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | tg128 | 41.68 ± 0.00 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..d0ae54b --- /dev/null +++ b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 558.52 ± 1.33 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 39.73 ± 0.02 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log new file mode 100644 index 0000000..4aa26ac --- /dev/null +++ b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1065.39 ± 1.75 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.12 ± 0.02 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..504249f --- /dev/null +++ b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 823.17 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 38.90 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..50abf7d --- /dev/null +++ b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1034.18 ± 3.12 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.08 ± 0.01 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..fab837f --- /dev/null +++ b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 896.75 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 38.88 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..12cfa4e --- /dev/null +++ b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 567.35 ± 4.92 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.67 ± 0.01 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..ec34bc3 --- /dev/null +++ b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 660.41 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 39.42 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..2433767 --- /dev/null +++ b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 560.67 ± 3.15 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.63 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..9e1315b --- /dev/null +++ b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 663.35 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 39.44 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1.log b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1.log rename to benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1.log diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1__longctx32768.log rename to benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1__longctx32768.log diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log rename to benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx32768.log rename to benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..64ecbc8 --- /dev/null +++ b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | pp512 | 1253.52 ± 10.26 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | tg128 | 47.03 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..bae8a21 --- /dev/null +++ b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 408.37 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 34.93 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log new file mode 100644 index 0000000..aa1388b --- /dev/null +++ b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | pp512 | 1016.39 ± 35.31 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | tg128 | 46.53 ± 0.03 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..4426e2b --- /dev/null +++ b/benchmark/results/09-01-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 403.09 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 40.91 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__fa1.log b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__fa1.log new file mode 100644 index 0000000..8ce6864 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | pp512 | 178.32 ± 26.83 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | tg128 | 12.97 ± 0.98 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__fa1__longctx32768.log new file mode 100644 index 0000000..9777216 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 38.11 ± 0.36 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 9.90 ± 0.00 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1.log b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1.log new file mode 100644 index 0000000..ce3e316 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | pp512 | 167.63 ± 28.76 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | tg128 | 12.73 ± 0.74 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..929e46f --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 37.92 ± 0.33 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 9.78 ± 0.20 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log new file mode 100644 index 0000000..2d23b75 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 206.60 ± 0.55 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.93 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..a8041b1 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 47.83 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 9.71 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..3e46d51 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 197.06 ± 14.56 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 15.02 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..8658106 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 47.67 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 9.70 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..d0be143 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 202.07 ± 3.84 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 16.09 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..6ef3440 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 38.42 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.00 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..7607a8d --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 188.66 ± 20.66 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 13.61 ± 1.01 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..8a32625 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 38.43 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.01 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1.log b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1.log rename to benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1.log diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx32768.log rename to benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log rename to benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx32768.log rename to benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..74cc3d7 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp512 | 168.14 ± 0.52 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg128 | 2.08 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..e62c799 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 17.62 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 1.39 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log new file mode 100644 index 0000000..382b3ea --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp512 | 146.89 ± 0.98 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg128 | 18.09 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..23324eb --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 22.66 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 6.52 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__fa1.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__fa1.log new file mode 100644 index 0000000..3aa8493 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 484.09 ± 10.61 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 27.12 ± 0.16 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__fa1__longctx32768.log new file mode 100644 index 0000000..4f16aaf --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 162.38 ± 4.20 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 18.71 ± 1.16 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__hblt0__fa1.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__hblt0__fa1.log new file mode 100644 index 0000000..50fd7e6 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 492.54 ± 2.48 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 27.09 ± 0.01 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..3bcfa1d --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 161.72 ± 4.90 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 19.36 ± 0.00 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log new file mode 100644 index 0000000..96acea1 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 576.12 ± 2.09 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 26.83 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..c06be6a --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 254.34 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.13 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..7809d51 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 585.67 ± 2.54 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 26.83 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..b898478 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 244.89 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.18 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..050cd71 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 492.51 ± 1.28 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.04 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..80b86d3 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 203.91 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.28 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..acf2f5f --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 494.46 ± 2.69 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.13 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..736f285 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 173.11 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.24 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1.log rename to benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1.log diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1__longctx32768.log rename to benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log rename to benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log rename to benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..5c4f9fe --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 424.44 ± 1.61 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 10.62 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..3d61772 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 65.51 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 8.05 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log new file mode 100644 index 0000000..33f57b0 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 364.62 ± 2.62 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 9.49 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..9b6a21f --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 93.65 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 8.14 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__fa1.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__fa1.log new file mode 100644 index 0000000..a326a57 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 813.78 ± 5.52 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 58.57 ± 0.01 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__fa1__longctx32768.log new file mode 100644 index 0000000..a38fcee --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 154.84 ± 3.34 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 31.32 ± 0.02 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__hblt0__fa1.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__hblt0__fa1.log new file mode 100644 index 0000000..dba8fa9 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 789.10 ± 47.98 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 58.51 ± 0.01 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..49dce2c --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 155.23 ± 3.28 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 31.27 ± 0.01 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log new file mode 100644 index 0000000..6b3826e --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1073.10 ± 11.76 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.78 ± 0.02 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..c8a6856 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 206.02 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 31.04 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..3e19bb9 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1075.09 ± 15.15 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.72 ± 0.02 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..d3cb0bc --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 204.43 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 31.02 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..efb6752 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 904.96 ± 12.42 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 58.50 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..1446b4a --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 158.93 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 31.07 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..2b1dc75 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 906.79 ± 8.48 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 58.55 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..6e97883 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 158.87 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 31.05 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1.log rename to benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1.log diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1__longctx32768.log rename to benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1.log rename to benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1__longctx32768.log rename to benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..01df500 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 1180.84 ± 8.60 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 66.24 ± 0.04 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..00d42b3 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 71.45 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 21.82 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log new file mode 100644 index 0000000..7e7199a --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 1046.73 ± 6.25 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 68.71 ± 0.14 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..edb34ce --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 109.86 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 30.94 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__fa1.log b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__fa1.log new file mode 100644 index 0000000..06add3d --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1207.32 ± 7.42 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 71.51 ± 0.01 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__fa1__longctx32768.log new file mode 100644 index 0000000..3022aa0 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 161.97 ± 3.98 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 34.41 ± 0.01 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__hblt0__fa1.log b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__hblt0__fa1.log new file mode 100644 index 0000000..101bd0a --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1096.05 ± 129.46 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 71.57 ± 0.01 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..54589c8 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 160.95 ± 3.41 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 34.37 ± 0.01 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log new file mode 100644 index 0000000..a7ccbd8 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1240.19 ± 1.93 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.06 ± 0.02 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..4178bdc --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 286.57 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.33 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..096ba0e --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1246.06 ± 12.57 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 68.95 ± 0.01 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..ae4c172 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 211.86 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.37 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..1f7c7dd --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1225.75 ± 5.62 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 71.54 ± 0.01 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..ac7f41b --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 163.98 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 34.13 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..243d803 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1228.38 ± 14.75 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 71.53 ± 0.03 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..097c8cf --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 165.67 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 34.01 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1.log b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1.log similarity index 100% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1.log rename to benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1.log diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1__longctx32768.log rename to benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1.log b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1.log rename to benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1__longctx32768.log rename to benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..df1dc18 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 1072.21 ± 149.58 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 1.52 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..8a11d0b --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 71.87 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 1.45 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log new file mode 100644 index 0000000..2539f39 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 1075.31 ± 42.44 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 86.69 ± 0.11 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..6079ff7 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 111.06 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 33.81 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1.log b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1.log new file mode 100644 index 0000000..fd2dcb3 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | pp512 | 211.96 ± 2.48 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | tg128 | 28.74 ± 0.49 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log new file mode 100644 index 0000000..e9297f0 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 239.82 ± 0.51 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 24.76 ± 1.87 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log new file mode 100644 index 0000000..fe3532d --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | pp512 | 262.57 ± 3.77 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | tg128 | 28.94 ± 0.05 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..d6b3f2b --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 295.41 ± 0.37 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 23.85 ± 3.58 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log new file mode 100644 index 0000000..1e7106e --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 587.41 ± 3.59 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 28.12 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..f0ae3e2 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 421.06 ± 0.00 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 25.55 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..41593f2 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 610.91 ± 4.82 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 28.22 ± 0.01 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..27251b7 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 432.47 ± 0.00 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 25.56 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..0b88bef --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 354.31 ± 5.52 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 29.40 ± 0.01 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..2b3ed00 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 327.85 ± 0.00 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.06 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..3ea4bf5 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 364.74 ± 5.05 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 29.38 ± 0.01 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..f3c68f2 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 340.53 ± 0.00 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.05 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log rename to benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log rename to benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log rename to benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log rename to benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..c0635f7 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp512 | 634.07 ± 4.20 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg128 | 33.94 ± 0.02 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..9e31a3e --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 121.89 ± 0.00 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 22.94 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log new file mode 100644 index 0000000..a95a432 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp512 | 538.47 ± 29.53 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg128 | 31.56 ± 0.11 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..c355291 --- /dev/null +++ b/benchmark/results/09-01-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 211.76 ± 0.00 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 27.44 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__fa1.log b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__fa1.log new file mode 100644 index 0000000..a2749f2 --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | pp512 | 323.22 ± 0.21 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | tg128 | 14.24 ± 0.00 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__fa1__longctx32768.log new file mode 100644 index 0000000..9f14bc9 --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 231.95 ± 3.74 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 11.66 ± 0.00 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log new file mode 100644 index 0000000..5cc6d4b --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | pp512 | 324.04 ± 0.14 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | tg128 | 14.25 ± 0.00 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..d9d363a --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 230.54 ± 3.36 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 11.66 ± 0.00 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log new file mode 100644 index 0000000..ca8d844 --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 931.79 ± 1.30 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.20 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..f18870e --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 247.33 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.61 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..7f55d20 --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 936.67 ± 1.30 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.20 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..3566962 --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 259.06 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.63 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..c78e668 --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 428.84 ± 1.18 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.24 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..5337922 --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 275.24 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.63 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..7e1d970 --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 431.06 ± 0.61 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.24 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..989d957 --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 283.40 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.63 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1.log b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1.log rename to benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1.log diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1__longctx32768.log rename to benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1__longctx32768.log diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log rename to benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx32768.log rename to benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..1c332ff --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp512 | 602.68 ± 80.42 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg128 | 14.56 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..aa3f7c3 --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 21.40 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 11.97 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log new file mode 100644 index 0000000..8bb191e --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp512 | 548.18 ± 1.59 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg128 | 13.94 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..7cec059 --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 231.70 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 10.11 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__fa1.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__fa1.log new file mode 100644 index 0000000..e6e1d62 --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | pp512 | 430.73 ± 1.02 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | tg128 | 3.86 ± 0.00 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__fa1__longctx32768.log new file mode 100644 index 0000000..f7bcf1c --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 193.46 ± 0.50 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 3.69 ± 0.01 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__hblt0__fa1.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__hblt0__fa1.log new file mode 100644 index 0000000..02273a2 --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | pp512 | 525.55 ± 1.67 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | tg128 | 4.00 ± 0.00 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..76656be --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 202.49 ± 3.52 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 3.72 ± 0.02 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log new file mode 100644 index 0000000..24ad07c --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 472.15 ± 0.56 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.00 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..a72de4d --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 188.56 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.72 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..302fb73 --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 531.41 ± 1.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.00 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..d754100 --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 214.27 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.72 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..1c6842e --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 470.21 ± 1.24 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.01 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..c14e74b --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 179.14 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.73 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..efdae4b --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 526.32 ± 1.23 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.02 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..a4539a4 --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 193.22 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.73 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1.log rename to benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1.log diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1__longctx32768.log rename to benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1__longctx32768.log diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log rename to benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log rename to benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log similarity index 100% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log rename to benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log diff --git a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..ef8b29d --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | pp512 | 111.81 ± 20.34 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | tg128 | 3.85 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..e6b8559 --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 73.77 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 3.40 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log new file mode 100644 index 0000000..e1a2ff4 --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | pp512 | 107.40 ± 0.65 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | tg128 | 3.92 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..f1e6c00 --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 64.09 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 3.67 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm-7.2__fa1.log b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm-7.2__fa1.log new file mode 100644 index 0000000..83da529 --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm-7.2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | pp512 | 2858.32 ± 17.99 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | tg128 | 84.57 ± 0.01 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm-7.2__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm-7.2__fa1__longctx32768.log new file mode 100644 index 0000000..5def9e8 --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm-7.2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 1182.57 ± 31.53 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 61.59 ± 0.02 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm-7.2__hblt0__fa1.log b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm-7.2__hblt0__fa1.log new file mode 100644 index 0000000..455b875 --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm-7.2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | pp512 | 2679.11 ± 228.92 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | tg128 | 70.08 ± 4.20 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm-7.2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..ebaaee2 --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm-7.2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 1138.90 ± 19.11 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 61.59 ± 0.04 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log new file mode 100644 index 0000000..c102746 --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2884.56 ± 5.24 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 80.80 ± 0.03 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..eeeb59c --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1446.85 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 59.42 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..2a53d30 --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2874.72 ± 3.55 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 80.97 ± 0.01 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..a78c350 --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1258.46 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 59.59 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..e5cb703 --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2829.05 ± 14.01 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 82.17 ± 4.20 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..421c142 --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1118.35 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 61.04 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..e8c43cb --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2843.10 ± 21.02 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 84.76 ± 0.02 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..143a699 --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1123.24 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 61.04 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1.log b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1.log rename to benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1.log diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1__longctx32768.log rename to benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1__longctx32768.log diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1.log b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1.log rename to benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1.log diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1__longctx32768.log rename to benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..6dcd777 --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp512 | 1514.96 ± 340.21 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg128 | 81.61 ± 2.29 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..a3d57b9 --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 188.74 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 66.83 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log new file mode 100644 index 0000000..ac9a8fa --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp512 | 1235.50 ± 244.41 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg128 | 81.02 ± 2.09 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..56ae086 --- /dev/null +++ b/benchmark/results/09-01-2026/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 694.43 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 49.80 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__fa1.log b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__fa1.log new file mode 100644 index 0000000..2299f09 --- /dev/null +++ b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | pp512 | 181.34 ± 1.51 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | tg128 | 52.25 ± 0.01 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__fa1__longctx32768.log b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__fa1__longctx32768.log new file mode 100644 index 0000000..6275774 --- /dev/null +++ b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 217.76 ± 0.23 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 36.60 ± 0.02 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__hblt0__fa1.log b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__hblt0__fa1.log new file mode 100644 index 0000000..c90c1af --- /dev/null +++ b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | pp512 | 181.33 ± 1.42 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | tg128 | 52.27 ± 0.01 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..c7a144b --- /dev/null +++ b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 215.89 ± 4.16 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 36.64 ± 0.02 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log new file mode 100644 index 0000000..3553f04 --- /dev/null +++ b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 683.09 ± 7.89 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.50 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..64eaca1 --- /dev/null +++ b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 334.72 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 40.07 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..78c2f6b --- /dev/null +++ b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 684.21 ± 8.30 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.99 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..6b62e44 --- /dev/null +++ b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 333.73 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 40.14 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..a0cb708 --- /dev/null +++ b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | pp512 | 674.34 ± 3.87 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | tg128 | 52.10 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..5cd16ba --- /dev/null +++ b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 352.35 ± 0.56 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.44 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..8898523 --- /dev/null +++ b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | pp512 | 679.62 ± 5.41 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | tg128 | 52.11 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..75ba15a --- /dev/null +++ b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 352.62 ± 1.18 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.46 ± 0.03 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1.log b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1.log similarity index 100% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1.log rename to benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1.log diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1__longctx32768.log rename to benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1__longctx32768.log diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1.log b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1.log similarity index 100% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1.log rename to benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1.log diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx32768.log rename to benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..5b42185 --- /dev/null +++ b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 862.37 ± 1.02 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 5.59 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..2ba5783 --- /dev/null +++ b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 183.43 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 5.21 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log new file mode 100644 index 0000000..9676853 --- /dev/null +++ b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 585.93 ± 27.57 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 57.18 ± 0.07 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..de517e6 --- /dev/null +++ b/benchmark/results/09-01-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 160.38 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 38.70 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm-7.2__fa1.log b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm-7.2__fa1.log new file mode 100644 index 0000000..4b54d27 --- /dev/null +++ b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm-7.2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | pp512 | 548.07 ± 6.87 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | tg128 | 73.66 ± 0.02 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm-7.2__fa1__longctx32768.log b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm-7.2__fa1__longctx32768.log new file mode 100644 index 0000000..ae41eeb --- /dev/null +++ b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm-7.2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 337.62 ± 3.36 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 52.54 ± 0.01 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm-7.2__hblt0__fa1.log b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm-7.2__hblt0__fa1.log new file mode 100644 index 0000000..9aea9e3 --- /dev/null +++ b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm-7.2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | pp512 | 546.89 ± 7.03 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | tg128 | 73.57 ± 0.01 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm-7.2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..9029d6a --- /dev/null +++ b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm-7.2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 337.07 ± 4.50 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 52.54 ± 0.01 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log new file mode 100644 index 0000000..872dcfb --- /dev/null +++ b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1776.02 ± 15.96 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.43 ± 0.01 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..409ef11 --- /dev/null +++ b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 596.09 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 57.20 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..a92dd8c --- /dev/null +++ b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1777.68 ± 17.27 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.45 ± 0.01 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..3d98873 --- /dev/null +++ b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 546.37 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 57.26 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..e0b5cee --- /dev/null +++ b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | pp512 | 1734.91 ± 19.75 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | tg128 | 73.48 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..e5b1d94 --- /dev/null +++ b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 557.68 ± 0.88 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 57.80 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..5330505 --- /dev/null +++ b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | pp512 | 1709.32 ± 28.51 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | tg128 | 73.53 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..52dd5fe --- /dev/null +++ b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 541.93 ± 1.58 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 57.84 ± 0.02 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1__fa1.log b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7.1.1__fa1.log similarity index 100% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1__fa1.log rename to benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7.1.1__fa1.log diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7.1.1__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1__fa1__longctx32768.log rename to benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7.1.1__fa1__longctx32768.log diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1.log b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1.log similarity index 100% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1.log rename to benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1.log diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1__longctx32768.log rename to benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..ca81fc1 --- /dev/null +++ b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1771.72 ± 240.97 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 7.95 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..a764fdc --- /dev/null +++ b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 294.23 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 7.42 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log new file mode 100644 index 0000000..65dad4f --- /dev/null +++ b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1429.10 ± 24.10 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 80.56 ± 0.18 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..b18d7b4 --- /dev/null +++ b/benchmark/results/09-01-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 284.79 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 56.04 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm-7.2__fa1.log b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm-7.2__fa1.log new file mode 100644 index 0000000..4834219 --- /dev/null +++ b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm-7.2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | pp512 | 549.23 ± 0.33 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | tg128 | 50.62 ± 0.01 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm-7.2__fa1__longctx32768.log b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm-7.2__fa1__longctx32768.log new file mode 100644 index 0000000..fa012c7 --- /dev/null +++ b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm-7.2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 139.39 ± 0.75 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 5.59 ± 0.00 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm-7.2__hblt0__fa1.log b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm-7.2__hblt0__fa1.log new file mode 100644 index 0000000..58b1ece --- /dev/null +++ b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm-7.2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | pp512 | 549.22 ± 0.08 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | tg128 | 50.67 ± 0.01 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm-7.2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..1f2b6d2 --- /dev/null +++ b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm-7.2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 137.30 ± 1.39 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 5.60 ± 0.00 | + +build: a14b960bc (7816) diff --git a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1.log b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1.log new file mode 100644 index 0000000..28fa54f --- /dev/null +++ b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1595.85 ± 4.24 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.05 ± 0.02 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..eabd52b --- /dev/null +++ b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 188.69 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.93 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..97ed387 --- /dev/null +++ b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1599.42 ± 4.42 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.06 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..8b942a9 --- /dev/null +++ b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 187.77 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.93 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..0008484 --- /dev/null +++ b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 671.91 ± 0.33 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.72 ± 0.02 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..724fd52 --- /dev/null +++ b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 147.45 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.59 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..55d9338 --- /dev/null +++ b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 671.59 ± 0.34 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.64 ± 0.01 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..d7a864a --- /dev/null +++ b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 145.15 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.59 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1__fa1.log b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7.1.1__fa1.log similarity index 100% rename from benchmark/results/llama-2-7b.Q4_0__rocm7.1.1__fa1.log rename to benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7.1.1__fa1.log diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1__fa1__longctx32768.log b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7.1.1__fa1__longctx32768.log similarity index 100% rename from benchmark/results/llama-2-7b.Q4_0__rocm7.1.1__fa1__longctx32768.log rename to benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7.1.1__fa1__longctx32768.log diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1.log b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1.log similarity index 100% rename from benchmark/results/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1.log rename to benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1.log diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1__longctx32768.log b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1__longctx32768.log rename to benchmark/results/09-01-2026/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..7655db3 --- /dev/null +++ b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 1261.90 ± 215.63 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 6.26 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..5a169c2 --- /dev/null +++ b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 104.76 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 4.00 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__vulkan_radv__fa1.log b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__vulkan_radv__fa1.log new file mode 100644 index 0000000..5548853 --- /dev/null +++ b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 1126.16 ± 2.34 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 55.42 ± 0.08 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..697ab8d --- /dev/null +++ b/benchmark/results/09-01-2026/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 162.11 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 8.74 ± 0.00 | + +build: 9c142e3a2 (7670) diff --git a/benchmark/results/09-01-2026/system_info.json b/benchmark/results/09-01-2026/system_info.json new file mode 100644 index 0000000..d29bc7f --- /dev/null +++ b/benchmark/results/09-01-2026/system_info.json @@ -0,0 +1,6 @@ +{ + "distro": "Fedora Linux 42 (Workstation Edition)", + "kernel": "6.18.3-100.fc42.x86_64", + "linux_firmware": "linux-firmware-20251111-1.fc42.noarch", + "timestamp": "09 Jan 2026" +} \ No newline at end of file diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1.log index 6fe8641..07bbca3 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 66.52 ± 7.27 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.76 ± 0.08 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 72.93 ± 0.06 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.95 ± 0.06 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log index f2148c0..d3dcb44 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 16.57 ± 0.05 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.08 ± 0.00 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 16.57 ± 0.04 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.08 ± 0.01 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log index 648542a..dc35095 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 71.47 ± 0.20 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.82 ± 0.07 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 71.58 ± 0.06 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.99 ± 0.01 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log index 2892f38..914ebcc 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 16.47 ± 0.03 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.06 ± 0.02 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 16.48 ± 0.03 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.07 ± 0.01 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log index 665e557..b4213c1 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 81.03 ± 0.03 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.07 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 79.51 ± 0.07 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.72 ± 0.10 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log index 19a7c47..86f7ce7 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.37 ± 0.00 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.15 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 17.57 ± 0.05 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.15 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log index 5b9f63c..2c8897a 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 79.43 ± 0.03 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.80 ± 0.05 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 79.24 ± 0.10 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.67 ± 0.11 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log index 623b913..77126a6 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.42 ± 0.00 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.15 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 17.53 ± 0.03 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.15 ± 0.01 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log index a56cbfc..8e9d026 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 73.64 ± 0.05 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.74 ± 0.22 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 78.28 ± 0.06 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.98 ± 0.04 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log index d33cd4c..b8f691a 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log @@ -1,18 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:96: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7f1a3468b5a5] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f1a3468b96b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f1a3468baef] -/usr/local/lib64/libggml-hip.so.0(+0x2d4e882) [0x7f1a37496882] -/usr/local/lib64/libggml-hip.so.0(+0x2d53c4e) [0x7f1a3749bc4e] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f1a346a2e5e] -/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f1a37b75630] -/usr/local/bin/llama-bench() [0x40ae7c] -/usr/local/bin/llama-bench() [0x408bd1] -/lib64/libc.so.6(+0x35b5) [0x7f1a340215b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f1a34021668] -/usr/local/bin/llama-bench() [0x409cf5] -✖ ! [rocm-7alpha] Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__fa1 __longctx32768 failed (exit 0) +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 17.18 ± 0.03 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.06 ± 0.02 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log index d353fd9..baf867e 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 72.94 ± 2.79 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.16 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 80.59 ± 0.10 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.99 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log index e87a569..108f8a6 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.14 ± 0.00 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.07 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 17.27 ± 0.02 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.07 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log index 3690a51..3836dd7 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | pp512 | 41.19 ± 7.76 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | tg128 | 1.87 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | pp512 | 17.65 ± 0.01 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | tg128 | 3.00 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log index fd57886..b2e6658 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 10.37 ± 0.00 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 1.26 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 4.94 ± 0.01 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | tg32 @ d32768 | 1.69 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log index 67ff2d8..aeb25b4 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | pp512 | 47.53 ± 0.02 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | tg128 | 2.96 ± 0.05 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | pp512 | 54.76 ± 11.46 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | tg128 | 3.00 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log index 4b28cb9..31ef62e 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 12.50 ± 0.00 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 2.27 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 7.15 ± 0.02 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | tg32 @ d32768 | 2.27 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7.2__fa1.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7.2__fa1.log new file mode 100644 index 0000000..455d013 --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7.2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 454.95 ± 1.90 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 22.26 ± 0.03 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7.2__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7.2__fa1__longctx32768.log new file mode 100644 index 0000000..71489ab --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7.2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 94.79 ± 0.56 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 16.48 ± 0.09 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7.2__hblt0__fa1.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7.2__hblt0__fa1.log new file mode 100644 index 0000000..e9729d2 --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7.2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 425.21 ± 1.79 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 23.41 ± 0.03 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..cd27b5f --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 93.83 ± 0.40 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 16.55 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1.log new file mode 100644 index 0000000..d58dab9 --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 407.15 ± 2.05 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 21.51 ± 0.03 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..aa07720 --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 101.09 ± 0.37 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 16.23 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..f69d3f6 --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 414.23 ± 2.09 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 23.11 ± 0.03 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..6b8851c --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 100.06 ± 0.38 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 15.97 ± 0.45 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..5c3d51b --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 489.62 ± 3.63 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 23.40 ± 0.02 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..718fbd4 --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 92.48 ± 1.13 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 16.50 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..45c08f7 --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 425.86 ± 2.29 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 23.41 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..9bd643e --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 92.06 ± 0.08 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 16.51 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..f8630c5 --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | pp512 | 106.42 ± 0.08 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | tg128 | 10.87 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..a025866 --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 6.09 ± 0.00 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | tg32 @ d32768 | 8.28 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1.log new file mode 100644 index 0000000..f2c87cb --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | pp512 | 333.10 ± 6.48 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | tg128 | 9.51 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..ad53f7f --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 78.99 ± 0.25 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | tg32 @ d32768 | 8.13 ± 0.02 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7.2__fa1.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7.2__fa1.log new file mode 100644 index 0000000..01e9227 --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7.2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 398.34 ± 1.32 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 35.94 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7.2__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7.2__fa1__longctx32768.log new file mode 100644 index 0000000..e85162c --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7.2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 90.22 ± 4.88 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 22.35 ± 0.04 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log new file mode 100644 index 0000000..da85676 --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 398.87 ± 1.21 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 36.09 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..40cfc38 --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 92.13 ± 0.15 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 21.56 ± 1.34 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log new file mode 100644 index 0000000..a68e922 --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 947.86 ± 2.03 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 33.77 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..11651a6 --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 84.85 ± 1.04 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 21.89 ± 0.04 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..d44f270 --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 952.84 ± 2.21 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 35.23 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..3627139 --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 84.01 ± 0.58 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 21.97 ± 0.04 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..35840f2 --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 983.72 ± 3.21 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 36.20 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..7c3adf6 --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 80.32 ± 1.28 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 22.31 ± 0.04 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..3d7dec0 --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 955.10 ± 4.53 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 36.16 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..8bd5686 --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 81.34 ± 1.80 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 22.32 ± 0.04 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..5754f88 --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | pp512 | 368.78 ± 0.17 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | tg128 | 40.80 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..58b0878 --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 6.35 ± 0.00 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | tg32 @ d32768 | 18.75 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log new file mode 100644 index 0000000..8c3838a --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | pp512 | 877.18 ± 8.15 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | tg128 | 40.07 ± 0.78 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..f135b33 --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 90.27 ± 0.42 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | tg32 @ d32768 | 23.07 ± 0.03 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1.log index eb98cfd..e023a01 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | pp512 | 49.50 ± 0.01 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | pp512 | 48.83 ± 0.01 | | llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | tg128 | 2.78 ± 0.00 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log index 6c2824c..94fd676 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 27.87 ± 0.75 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.42 ± 0.06 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 29.25 ± 0.17 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.45 ± 0.02 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log index 1533579..3a2ff58 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | pp512 | 49.27 ± 0.02 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | pp512 | 49.38 ± 0.03 | | llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | tg128 | 2.79 ± 0.00 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log index c15e681..0c8dbc3 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 28.46 ± 0.36 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.44 ± 0.04 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 29.17 ± 0.18 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.46 ± 0.00 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log index ce0f9af..238a9e8 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 148.26 ± 0.07 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | pp512 | 146.04 ± 0.21 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | tg128 | 2.78 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log index 7b5211d..14bad93 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 34.54 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.46 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 36.22 ± 0.16 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.43 ± 0.03 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log index 44dfe3c..b6f9056 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 147.21 ± 0.14 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | pp512 | 146.83 ± 0.25 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | tg128 | 2.78 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log index 035ac3b..0611423 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 34.82 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.46 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 36.40 ± 0.23 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.46 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log index c7be012..ac63a96 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 67.05 ± 0.01 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | pp512 | 155.06 ± 0.11 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | tg128 | 2.79 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log index e74c380..1986253 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 26.90 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.46 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 38.36 ± 0.61 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.46 ± 0.01 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log index 05a9c21..b16ded7 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 66.64 ± 0.02 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | pp512 | 151.70 ± 0.21 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | tg128 | 2.78 ± 0.01 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log index 4a65754..5bd76eb 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 26.86 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.46 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 38.35 ± 0.67 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.46 ± 0.02 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log index 00c2046..a342daf 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp512 | 100.89 ± 0.24 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg128 | 2.81 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | pp512 | 21.74 ± 0.01 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | tg128 | 2.81 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log index a1e97ab..23f10da 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 18.12 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 2.16 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 8.35 ± 0.01 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | tg32 @ d32768 | 2.36 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log index 1f40e8f..6907745 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp512 | 87.66 ± 0.55 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | pp512 | 99.39 ± 0.58 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | tg128 | 2.76 ± 0.04 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log index 4832afb..36b4c25 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 21.96 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 2.39 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 11.79 ± 0.02 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | tg32 @ d32768 | 2.44 ± 0.01 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__fa1.log index 921c345..df4a892 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 797.13 ± 2.39 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 800.17 ± 1.72 | | mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.49 ± 0.00 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__fa1__longctx32768.log index 9dfec45..f184a50 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__fa1__longctx32768.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 146.47 ± 5.52 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 169.18 ± 1.16 | | mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.11 ± 0.00 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__hblt0__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__hblt0__fa1.log index 0832956..32590b1 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__hblt0__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__hblt0__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 803.39 ± 2.22 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 803.22 ± 2.21 | | mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.49 ± 0.00 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__hblt0__fa1__longctx32768.log index 91e85e1..505df24 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__hblt0__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 152.56 ± 6.51 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 170.11 ± 0.81 | | mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.11 ± 0.00 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log index c87eda5..8118e71 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 801.73 ± 2.77 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.49 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 764.18 ± 1.66 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.48 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log index d252ff5..f3f55d3 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 163.31 ± 0.00 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.10 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 166.22 ± 1.20 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.10 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log index 0a3cf91..661b27c 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 805.52 ± 3.18 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.48 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 766.68 ± 1.07 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.48 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx32768.log index 1f87ce0..6cd94b5 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 164.32 ± 0.00 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.10 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 164.84 ± 1.99 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.10 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log index b03e29d..1ef3f29 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 798.60 ± 3.84 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.49 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 990.88 ± 3.15 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.50 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log index 9a0fea9..5b40f9e 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 153.77 ± 0.00 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.10 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 172.42 ± 3.61 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.10 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log index 09d46a4..545b269 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 799.84 ± 4.89 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.49 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 799.71 ± 2.09 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.49 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log index 6ff28f3..5974e95 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 159.82 ± 0.00 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.11 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 170.19 ± 1.69 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.10 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log index e5b1efe..3c69e17 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp512 | 187.83 ± 22.96 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg128 | 8.19 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | pp512 | 19.70 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | tg128 | 8.24 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log index d2ca7d7..f5ad8c4 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 64.52 ± 0.00 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 5.69 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 16.69 ± 0.01 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | tg32 @ d32768 | 6.41 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log index 784b23b..e04be5a 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp512 | 195.84 ± 0.06 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg128 | 7.56 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | pp512 | 222.01 ± 0.94 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | tg128 | 7.59 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log index 5e0335b..d5938e7 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 75.42 ± 0.00 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 6.23 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 76.47 ± 0.38 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | tg32 @ d32768 | 6.39 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__fa1.log index a25472a..2325924 100644 --- a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__fa1.log +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | pp512 | 393.57 ± 2.37 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | tg128 | 41.69 ± 0.01 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | pp512 | 393.61 ± 2.94 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | tg128 | 42.58 ± 0.00 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__fa1__longctx32768.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__fa1__longctx32768.log index 7ccfdf7..47210cf 100644 --- a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__fa1__longctx32768.log +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 559.16 ± 0.99 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 39.74 ± 0.02 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 562.85 ± 0.47 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.59 ± 0.01 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log index eaea7c6..266236d 100644 --- a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | pp512 | 389.10 ± 3.02 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | tg128 | 41.68 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | pp512 | 388.54 ± 2.76 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | tg128 | 42.61 ± 0.01 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log index d0ae54b..9c300e9 100644 --- a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 558.52 ± 1.33 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 39.73 ± 0.02 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 564.71 ± 0.81 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.60 ± 0.00 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log index 4aa26ac..9da9fbc 100644 --- a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1065.39 ± 1.75 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.12 ± 0.02 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | pp512 | 1026.87 ± 6.06 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | tg128 | 41.90 ± 0.01 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log index 504249f..6b04e21 100644 --- a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 823.17 ± 0.00 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 38.90 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 1042.36 ± 2.24 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.08 ± 0.01 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log index 50abf7d..6463c3e 100644 --- a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1034.18 ± 3.12 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.08 ± 0.01 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | pp512 | 1027.41 ± 6.28 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | tg128 | 42.05 ± 0.01 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log index fab837f..34cd5ba 100644 --- a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 896.75 ± 0.00 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 38.88 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 1038.86 ± 3.17 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.04 ± 0.01 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log index 12cfa4e..34472dc 100644 --- a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 567.35 ± 4.92 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.67 ± 0.01 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | pp512 | 1070.15 ± 5.54 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | tg128 | 42.56 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log index ec34bc3..9a56129 100644 --- a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 660.41 ± 0.00 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 39.42 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 908.79 ± 27.38 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.91 ± 0.03 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log index 2433767..eefc5b7 100644 --- a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 560.67 ± 3.15 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.63 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | pp512 | 1038.67 ± 2.82 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | tg128 | 42.57 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log index 9e1315b..19b5211 100644 --- a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 663.35 ± 0.00 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 39.44 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 821.93 ± 29.40 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.92 ± 0.01 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log index 64ecbc8..21e6010 100644 --- a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | pp512 | 1253.52 ± 10.26 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | tg128 | 47.03 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | pp512 | 676.59 ± 50.83 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | tg128 | 47.22 ± 0.01 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log index bae8a21..b7d95f1 100644 --- a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 408.37 ± 0.00 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 34.93 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 342.52 ± 0.46 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | tg32 @ d32768 | 35.25 ± 0.03 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log index aa1388b..7ba7684 100644 --- a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | pp512 | 1016.39 ± 35.31 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | tg128 | 46.53 ± 0.03 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | pp512 | 951.76 ± 41.03 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | tg128 | 46.68 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log index 4426e2b..c01a4ec 100644 --- a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 403.09 ± 0.00 | -| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 40.91 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 799.39 ± 0.69 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | tg32 @ d32768 | 41.15 ± 0.06 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__fa1.log index 8ce6864..682b041 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | pp512 | 178.32 ± 26.83 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | tg128 | 12.97 ± 0.98 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | pp512 | 202.36 ± 3.50 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | tg128 | 15.80 ± 0.00 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__fa1__longctx32768.log index 9777216..9057c7d 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 38.11 ± 0.36 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 9.90 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 41.36 ± 0.87 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 9.65 ± 0.30 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1.log index ce3e316..366d2a3 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | pp512 | 167.63 ± 28.76 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | tg128 | 12.73 ± 0.74 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | pp512 | 200.10 ± 8.37 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | tg128 | 16.04 ± 0.06 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log index 929e46f..cb8f1d5 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 37.92 ± 0.33 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 9.78 ± 0.20 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 41.53 ± 0.41 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 9.50 ± 0.75 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log index 2d23b75..a353c9d 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 206.60 ± 0.55 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.93 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | pp512 | 205.05 ± 3.62 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | tg128 | 14.98 ± 0.03 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log index a8041b1..e89ce17 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 47.83 ± 0.00 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 9.71 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 51.11 ± 0.62 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 9.63 ± 0.07 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log index 3e46d51..50fa18d 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 197.06 ± 14.56 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 15.02 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | pp512 | 203.41 ± 3.52 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | tg128 | 15.00 ± 0.01 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log index 8658106..e681c9d 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 47.67 ± 0.00 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 9.70 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 51.19 ± 0.64 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 9.58 ± 0.10 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log index d0be143..803ea12 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 202.07 ± 3.84 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 16.09 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | pp512 | 200.04 ± 4.11 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | tg128 | 16.19 ± 0.09 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log index 6ef3440..7bd5d26 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 38.42 ± 0.00 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.00 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 41.42 ± 0.37 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 10.94 ± 0.42 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log index 7607a8d..291f2c8 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 188.66 ± 20.66 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 13.61 ± 1.01 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | pp512 | 197.48 ± 10.80 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | tg128 | 16.20 ± 0.08 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log index 8a32625..a79a619 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 38.43 ± 0.00 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.01 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 41.60 ± 0.36 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 10.81 ± 0.61 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log index 74cc3d7..5264d6e 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp512 | 168.14 ± 0.52 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg128 | 2.08 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | pp512 | 119.82 ± 3.30 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | tg128 | 17.75 ± 0.01 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log index e62c799..8ecec15 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 17.62 ± 0.00 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 1.39 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 27.41 ± 0.01 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | tg32 @ d32768 | 3.42 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log index 382b3ea..67b83cc 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp512 | 146.89 ± 0.98 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg128 | 18.09 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | pp512 | 133.28 ± 1.45 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | tg128 | 15.98 ± 0.25 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log index 23324eb..7945690 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 22.66 ± 0.00 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 6.52 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 30.79 ± 0.06 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | tg32 @ d32768 | 6.50 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__fa1.log index 3aa8493..f8f2cd5 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 484.09 ± 10.61 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 27.12 ± 0.16 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 489.11 ± 2.88 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 27.18 ± 0.16 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__fa1__longctx32768.log index 4f16aaf..c30a4c0 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 162.38 ± 4.20 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 18.71 ± 1.16 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 214.97 ± 1.13 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 18.57 ± 1.40 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__hblt0__fa1.log index 50fd7e6..3f8e597 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__hblt0__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 492.54 ± 2.48 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 27.09 ± 0.01 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 492.32 ± 2.55 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 27.23 ± 0.02 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log index 3bcfa1d..23b0efd 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 161.72 ± 4.90 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 19.36 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 207.64 ± 0.55 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 18.84 ± 0.97 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log index 96acea1..4bf79a0 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 576.12 ± 2.09 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 26.83 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 576.03 ± 3.01 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 26.12 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log index c06be6a..d1e5d2a 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 254.34 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.13 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 249.94 ± 1.13 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 19.18 ± 0.33 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log index 7809d51..3d12444 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 585.67 ± 2.54 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 26.83 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 569.42 ± 8.52 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 27.07 ± 0.01 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log index b898478..fedbfed 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 244.89 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.18 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 250.24 ± 0.88 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 18.77 ± 0.98 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log index 050cd71..a6ca809 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 492.51 ± 1.28 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.04 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 512.10 ± 4.69 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 27.27 ± 0.04 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log index 80b86d3..c1c2630 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 203.91 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.28 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 216.18 ± 0.74 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 19.71 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log index acf2f5f..7e6c7c6 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 494.46 ± 2.69 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.13 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 493.72 ± 3.45 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 27.32 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log index 736f285..aec4b8d 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 173.11 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.24 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 209.02 ± 0.16 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 19.67 ± 0.01 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log index 5c4f9fe..4f9d7a2 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 424.44 ± 1.61 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 10.62 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | pp512 | 168.95 ± 7.69 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | tg128 | 10.62 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log index 3d61772..69c594e 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 65.51 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 8.05 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 75.04 ± 0.02 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | tg32 @ d32768 | 8.68 ± 0.01 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log index 33f57b0..e9a9b4f 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 364.62 ± 2.62 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 9.49 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | pp512 | 351.97 ± 2.56 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | tg128 | 9.42 ± 0.21 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log index 9b6a21f..e5c1a75 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 93.65 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 8.14 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 127.67 ± 0.45 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | tg32 @ d32768 | 8.31 ± 0.02 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__fa1.log index a326a57..0dd1297 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 813.78 ± 5.52 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 58.57 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 815.37 ± 5.82 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 58.54 ± 0.01 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__fa1__longctx32768.log index a38fcee..cef8f0a 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 154.84 ± 3.34 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 31.32 ± 0.02 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 170.56 ± 4.38 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 31.30 ± 0.03 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__hblt0__fa1.log index dba8fa9..c248812 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__hblt0__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 789.10 ± 47.98 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 58.51 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 811.39 ± 6.56 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 58.57 ± 0.01 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log index 49dce2c..d87028a 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 155.23 ± 3.28 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 31.27 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 171.54 ± 4.45 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 31.29 ± 0.01 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log index 6b3826e..73135a6 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1073.10 ± 11.76 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.78 ± 0.02 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1078.99 ± 11.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 56.45 ± 0.01 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log index c8a6856..0fa6c28 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 206.02 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 31.04 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 217.17 ± 8.71 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 30.94 ± 0.02 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log index 3e19bb9..e4c7e21 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1075.09 ± 15.15 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.72 ± 0.02 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1080.52 ± 10.73 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 57.49 ± 0.01 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log index d3cb0bc..bb64b06 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 204.43 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 31.02 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 218.42 ± 7.66 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 30.96 ± 0.01 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log index efb6752..75d0678 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 904.96 ± 12.42 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 58.50 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1056.78 ± 36.08 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 59.15 ± 0.01 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log index 1446b4a..fef8ac0 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 158.93 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 31.07 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 175.40 ± 4.11 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 31.98 ± 0.01 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log index 2b1dc75..e795bb5 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 906.79 ± 8.48 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 58.55 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1039.16 ± 53.94 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 59.16 ± 0.01 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log index 6e97883..cacca70 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 158.87 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 31.05 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 174.67 ± 4.22 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 31.98 ± 0.02 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log index 01df500..3e8cb37 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 1180.84 ± 8.60 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 66.24 ± 0.04 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | pp512 | 823.08 ± 48.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | tg128 | 66.14 ± 0.02 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log index 00d42b3..7e07c93 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 71.45 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 21.82 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 112.99 ± 0.13 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | tg32 @ d32768 | 27.35 ± 0.07 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log index 7e7199a..9fd3aa4 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 1046.73 ± 6.25 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 68.71 ± 0.14 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | pp512 | 1064.73 ± 70.49 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | tg128 | 68.93 ± 0.04 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log index edb34ce..f189e87 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 109.86 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 30.94 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 152.30 ± 3.42 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | tg32 @ d32768 | 34.18 ± 0.04 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__fa1.log index 06add3d..33abf49 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1207.32 ± 7.42 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 71.51 ± 0.01 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1209.23 ± 7.46 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 71.48 ± 0.01 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__fa1__longctx32768.log index 3022aa0..1e0bd42 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 161.97 ± 3.98 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 34.41 ± 0.01 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 177.01 ± 5.01 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 34.40 ± 0.01 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__hblt0__fa1.log index 101bd0a..3c417e5 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__hblt0__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__hblt0__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1096.05 ± 129.46 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 71.57 ± 0.01 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1207.91 ± 9.78 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 71.48 ± 0.01 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__hblt0__fa1__longctx32768.log index 54589c8..d871650 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__hblt0__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 160.95 ± 3.41 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 175.56 ± 3.86 | | qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 34.37 ± 0.01 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log index a7ccbd8..22e9a0f 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1240.19 ± 1.93 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.06 ± 0.02 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1263.87 ± 7.23 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 68.78 ± 0.02 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log index 4178bdc..0afdf48 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 286.57 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.33 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 222.20 ± 8.55 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 33.48 ± 0.02 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log index 096ba0e..5e79f7a 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1246.06 ± 12.57 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 68.95 ± 0.01 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1260.69 ± 6.89 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 68.94 ± 0.02 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log index ae4c172..a96e13b 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 211.86 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.37 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 222.34 ± 7.55 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 33.52 ± 0.01 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log index 1f7c7dd..3b0cc22 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1225.75 ± 5.62 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 71.54 ± 0.01 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1241.85 ± 15.18 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 72.57 ± 0.01 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log index ac7f41b..5027639 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 163.98 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 34.13 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 176.98 ± 4.28 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 35.43 ± 0.01 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log index 243d803..70fc750 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1228.38 ± 14.75 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 71.53 ± 0.03 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1229.55 ± 20.23 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 72.45 ± 0.01 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1__longctx32768.log index 097c8cf..0b7f117 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 165.67 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 34.01 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 176.47 ± 4.18 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 35.44 ± 0.02 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log index df1dc18..f27f9db 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 1072.21 ± 149.58 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 1.52 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | pp512 | 846.24 ± 47.60 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | tg128 | 86.32 ± 0.04 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log index 8a11d0b..c8693bb 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 71.87 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 1.45 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 114.18 ± 0.07 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | tg32 @ d32768 | 30.07 ± 0.04 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log index 2539f39..4e7288a 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 1075.31 ± 42.44 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 86.69 ± 0.11 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | pp512 | 1005.90 ± 6.10 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | tg128 | 79.55 ± 6.96 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log index 6079ff7..15abf33 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 111.06 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 33.81 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 153.83 ± 3.76 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | tg32 @ d32768 | 37.44 ± 0.05 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1.log index fd2dcb3..f5ae063 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | pp512 | 211.96 ± 2.48 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | tg128 | 28.74 ± 0.49 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | pp512 | 193.67 ± 2.12 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | tg128 | 28.98 ± 0.01 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log index e9297f0..d5132fd 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 239.82 ± 0.51 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 24.76 ± 1.87 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 217.45 ± 0.19 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 23.87 ± 3.54 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log index fe3532d..d64554b 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | pp512 | 262.57 ± 3.77 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | tg128 | 28.94 ± 0.05 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | pp512 | 263.91 ± 3.82 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | tg128 | 28.97 ± 0.00 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log index d6b3f2b..206af26 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 295.41 ± 0.37 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 23.85 ± 3.58 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 296.64 ± 0.50 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 23.44 ± 4.28 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log index 1e7106e..a915bc9 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 587.41 ± 3.59 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 28.12 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | pp512 | 592.54 ± 4.38 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | tg128 | 27.45 ± 0.01 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log index f0ae3e2..99b98d0 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 421.06 ± 0.00 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 25.55 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 449.68 ± 1.06 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 25.10 ± 0.02 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log index 41593f2..3d76bbf 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 610.91 ± 4.82 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 28.22 ± 0.01 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | pp512 | 592.83 ± 4.39 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | tg128 | 27.75 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log index 27251b7..c1c013e 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 432.47 ± 0.00 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 25.56 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 448.82 ± 1.02 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 25.07 ± 0.35 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log index 0b88bef..b89bb7b 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 354.31 ± 5.52 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 29.40 ± 0.01 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | pp512 | 282.60 ± 2.04 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | tg128 | 28.89 ± 0.20 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log index 2b3ed00..5d5c0db 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 327.85 ± 0.00 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.06 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 238.71 ± 0.62 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 23.93 ± 4.11 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log index 3ea4bf5..b8878a5 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 364.74 ± 5.05 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 29.38 ± 0.01 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | pp512 | 590.03 ± 3.05 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | tg128 | 28.73 ± 0.52 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log index f3c68f2..be1cd00 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 340.53 ± 0.00 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.05 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 413.78 ± 0.61 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 23.54 ± 3.30 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log index c0635f7..35f7add 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp512 | 634.07 ± 4.20 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg128 | 33.94 ± 0.02 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | pp512 | 426.39 ± 3.26 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | tg128 | 31.84 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log index 9e31a3e..9ed3aa4 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 121.89 ± 0.00 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 22.94 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 228.41 ± 1.50 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | tg32 @ d32768 | 22.47 ± 0.04 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log index a95a432..19a2fa7 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp512 | 538.47 ± 29.53 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg128 | 31.56 ± 0.11 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | pp512 | 509.22 ± 20.34 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | tg128 | 29.92 ± 0.05 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log index c355291..faa295c 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 211.76 ± 0.00 | -| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 27.44 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 370.94 ± 32.12 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | tg32 @ d32768 | 26.00 ± 0.20 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__fa1.log index a2749f2..a9d4fa7 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | pp512 | 323.22 ± 0.21 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | pp512 | 323.33 ± 0.27 | | gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | tg128 | 14.24 ± 0.00 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__fa1__longctx32768.log index 9f14bc9..ee21601 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 231.95 ± 3.74 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 11.66 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 232.79 ± 5.34 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 11.65 ± 0.00 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log index 5cc6d4b..9ed3a0f 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | pp512 | 324.04 ± 0.14 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | tg128 | 14.25 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | pp512 | 324.44 ± 0.31 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | tg128 | 14.24 ± 0.00 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log index d9d363a..45ba7e6 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 230.54 ± 3.36 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 229.19 ± 6.79 | | gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 11.66 ± 0.00 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log index ca8d844..86e7334 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 931.79 ± 1.30 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.20 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | pp512 | 936.69 ± 1.33 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | tg128 | 14.23 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log index f18870e..9364a43 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 247.33 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.61 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 258.34 ± 1.81 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 11.63 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log index 7f55d20..2eda1f4 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 936.67 ± 1.30 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.20 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | pp512 | 935.37 ± 1.09 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | tg128 | 14.20 ± 0.02 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log index 3566962..5f3c340 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 259.06 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.63 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 261.44 ± 5.27 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 11.62 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log index c78e668..fb87a4a 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 428.84 ± 1.18 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.24 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | pp512 | 943.63 ± 1.62 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | tg128 | 14.25 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log index 5337922..14934cf 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 275.24 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.63 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 396.59 ± 26.74 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 11.65 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log index 7e1d970..ba60108 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 431.06 ± 0.61 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.24 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | pp512 | 942.52 ± 1.34 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | tg128 | 14.25 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log index 989d957..878aaa3 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 283.40 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.63 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 376.68 ± 9.34 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 11.65 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log index 1c332ff..55932ee 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp512 | 602.68 ± 80.42 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg128 | 14.56 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | pp512 | 125.50 ± 0.06 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | tg128 | 14.45 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log index aa3f7c3..41a43d8 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 21.40 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 11.97 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 111.11 ± 0.04 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | tg32 @ d32768 | 11.40 ± 0.01 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log index 8bb191e..edac157 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp512 | 548.18 ± 1.59 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg128 | 13.94 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | pp512 | 687.05 ± 0.75 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | tg128 | 14.14 ± 0.01 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log index 7cec059..24d4dab 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 231.70 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 10.11 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 376.92 ± 18.46 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | tg32 @ d32768 | 11.72 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__fa1.log index e6e1d62..1ff6f83 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | pp512 | 430.73 ± 1.02 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | tg128 | 3.86 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | pp512 | 463.92 ± 1.19 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | tg128 | 4.02 ± 0.00 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__fa1__longctx32768.log index f7bcf1c..b36f842 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 193.46 ± 0.50 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 3.69 ± 0.01 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 191.32 ± 3.30 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 3.68 ± 0.11 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__hblt0__fa1.log index 02273a2..c643303 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__hblt0__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__hblt0__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | pp512 | 525.55 ± 1.67 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | tg128 | 4.00 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | pp512 | 528.00 ± 0.44 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | tg128 | 4.02 ± 0.00 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log index 76656be..cc34693 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 202.49 ± 3.52 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 3.72 ± 0.02 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 201.67 ± 1.78 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 3.74 ± 0.00 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log index 24ad07c..00bcee5 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 472.15 ± 0.56 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.00 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | pp512 | 508.08 ± 0.85 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | tg128 | 4.00 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log index a72de4d..12a07a9 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 188.56 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.72 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 222.44 ± 2.25 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 3.72 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log index 302fb73..97f96ee 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 531.41 ± 1.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.00 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | pp512 | 508.48 ± 0.88 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | tg128 | 4.00 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log index d754100..93404ec 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 214.27 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.72 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 220.03 ± 0.98 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 3.72 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log index 1c6842e..d8498c8 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 470.21 ± 1.24 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.01 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | pp512 | 549.57 ± 2.42 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | tg128 | 4.02 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log index c14e74b..08d310a 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 179.14 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.73 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 215.98 ± 0.94 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 3.73 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log index efdae4b..59af47c 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 526.32 ± 1.23 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.02 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | pp512 | 529.01 ± 0.98 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | tg128 | 4.02 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log index a4539a4..359ca8d 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 193.22 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.73 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 204.92 ± 2.92 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 3.73 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log index ef8b29d..db92211 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | pp512 | 111.81 ± 20.34 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | tg128 | 3.85 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | pp512 | 9.32 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | tg128 | 3.87 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log index e6b8559..7a34429 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 73.77 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 3.40 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 9.20 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | tg32 @ d32768 | 3.60 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log index e1a2ff4..858bf96 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | pp512 | 107.40 ± 0.65 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | tg128 | 3.92 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | pp512 | 123.07 ± 0.27 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | tg128 | 3.92 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log index f1e6c00..767256d 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 64.09 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 3.67 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 82.96 ± 0.72 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | tg32 @ d32768 | 3.66 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7.2__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7.2__fa1.log index 83da529..c50aeb9 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7.2__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7.2__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | pp512 | 2858.32 ± 17.99 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | tg128 | 84.57 ± 0.01 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | pp512 | 2870.77 ± 12.89 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | tg128 | 84.57 ± 0.03 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7.2__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7.2__fa1__longctx32768.log index 5def9e8..861d2dd 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7.2__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7.2__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 1182.57 ± 31.53 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 61.59 ± 0.02 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 1639.03 ± 15.14 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 61.51 ± 0.01 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7.2__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7.2__hblt0__fa1.log index 455b875..7ec3e36 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7.2__hblt0__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7.2__hblt0__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | pp512 | 2679.11 ± 228.92 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | tg128 | 70.08 ± 4.20 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | pp512 | 2807.93 ± 16.33 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | tg128 | 84.66 ± 0.03 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7.2__hblt0__fa1__longctx32768.log index ebaaee2..aba12ed 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7.2__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7.2__hblt0__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 1138.90 ± 19.11 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 61.59 ± 0.04 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 1262.15 ± 24.34 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 61.54 ± 0.04 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log index c102746..6b110c7 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2884.56 ± 5.24 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 80.80 ± 0.03 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | pp512 | 2891.85 ± 2.60 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | tg128 | 82.18 ± 0.01 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log index eeeb59c..71df791 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1446.85 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 59.42 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 1600.62 ± 30.98 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 60.21 ± 0.11 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log index 2a53d30..98f27b4 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2874.72 ± 3.55 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 80.97 ± 0.01 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | pp512 | 2893.75 ± 3.92 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | tg128 | 82.15 ± 0.02 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log index a78c350..d1578cd 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1258.46 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 59.59 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 1419.18 ± 40.21 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 60.24 ± 0.11 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log index e5cb703..fe1a281 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2829.05 ± 14.01 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 82.17 ± 4.20 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | pp512 | 2805.65 ± 13.25 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | tg128 | 85.35 ± 0.01 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log index 421c142..ef479c4 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1118.35 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 61.04 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 1215.66 ± 10.33 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 62.02 ± 0.03 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log index e8c43cb..da4aac2 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2843.10 ± 21.02 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 84.76 ± 0.02 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | pp512 | 2800.57 ± 47.75 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | tg128 | 85.47 ± 0.02 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1__longctx32768.log index 143a699..e240976 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1123.24 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 61.04 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 1214.20 ± 13.26 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 62.03 ± 0.03 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log index 6dcd777..6ebe735 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp512 | 1514.96 ± 340.21 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg128 | 81.61 ± 2.29 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | pp512 | 657.19 ± 0.41 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | tg128 | 86.55 ± 0.10 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log index a3d57b9..404a158 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 188.74 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 66.83 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 493.70 ± 0.98 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | tg32 @ d32768 | 58.57 ± 0.11 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log index ac9a8fa..0537089 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp512 | 1235.50 ± 244.41 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg128 | 81.02 ± 2.09 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | pp512 | 1977.82 ± 204.87 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | tg128 | 91.09 ± 3.96 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log index 56ae086..e118216 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 694.43 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 49.80 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 1149.92 ± 30.21 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | tg32 @ d32768 | 67.86 ± 0.22 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__fa1.log index 2299f09..f098ce7 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | pp512 | 181.34 ± 1.51 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | tg128 | 52.25 ± 0.01 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | pp512 | 181.09 ± 1.36 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | tg128 | 51.77 ± 0.73 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__fa1__longctx32768.log index 6275774..0d8b56a 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 217.76 ± 0.23 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 36.60 ± 0.02 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 217.33 ± 0.32 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 36.51 ± 0.01 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__hblt0__fa1.log index c90c1af..d6ccfa5 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__hblt0__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | pp512 | 181.33 ± 1.42 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | tg128 | 52.27 ± 0.01 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | pp512 | 181.23 ± 1.39 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | tg128 | 52.06 ± 0.01 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log index c7a144b..9bfd24d 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 215.89 ± 4.16 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 36.64 ± 0.02 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 218.30 ± 0.10 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 34.08 ± 4.33 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log index 3553f04..a05b582 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 683.09 ± 7.89 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.50 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | pp512 | 681.58 ± 4.94 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | tg128 | 50.85 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log index 64eaca1..5948f34 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 334.72 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 40.07 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 396.76 ± 35.92 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.13 ± 0.05 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log index 78c2f6b..abd20b5 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 684.21 ± 8.30 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.99 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | pp512 | 678.97 ± 4.29 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | tg128 | 51.88 ± 0.01 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log index 6b62e44..e3aae6c 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 333.73 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 40.14 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 400.64 ± 35.51 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 37.97 ± 3.34 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log index a0cb708..5365be2 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | pp512 | 674.34 ± 3.87 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | tg128 | 52.10 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | pp512 | 649.28 ± 39.54 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | tg128 | 52.00 ± 0.00 | build: e0c93af2a (7938) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log index 5cd16ba..c611dc9 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 352.35 ± 0.56 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.44 ± 0.01 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 259.94 ± 5.81 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.30 ± 0.03 | build: e0c93af2a (7938) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log index 8898523..83fdee3 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | pp512 | 679.62 ± 5.41 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | tg128 | 52.11 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | pp512 | 666.65 ± 12.50 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | tg128 | 52.05 ± 0.00 | build: e0c93af2a (7938) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log index 75ba15a..420ec5d 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 352.62 ± 1.18 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.46 ± 0.03 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 261.35 ± 6.47 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.36 ± 0.01 | build: e0c93af2a (7938) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log index 5b42185..4e6b4ce 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 862.37 ± 1.02 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 5.59 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | pp512 | 643.04 ± 39.69 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | tg128 | 54.00 ± 0.04 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log index 2ba5783..394417c 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 183.43 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 5.21 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 195.45 ± 2.65 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | tg32 @ d32768 | 37.02 ± 0.03 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log index 9676853..40087c2 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 585.93 ± 27.57 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 57.18 ± 0.07 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | pp512 | 597.02 ± 9.82 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | tg128 | 57.38 ± 0.04 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log index de517e6..e8acd38 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 160.38 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 38.70 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 278.37 ± 7.19 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | tg32 @ d32768 | 42.78 ± 0.09 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7.2__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7.2__fa1.log index 4b54d27..78c56b0 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7.2__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7.2__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | pp512 | 548.07 ± 6.87 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | tg128 | 73.66 ± 0.02 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | pp512 | 547.85 ± 6.58 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | tg128 | 73.52 ± 0.01 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7.2__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7.2__fa1__longctx32768.log index ae41eeb..e4ebb3b 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7.2__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7.2__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 337.62 ± 3.36 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 52.54 ± 0.01 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 402.32 ± 0.67 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 52.50 ± 0.02 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7.2__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7.2__hblt0__fa1.log index 9aea9e3..bb60d45 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7.2__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7.2__hblt0__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | pp512 | 546.89 ± 7.03 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | tg128 | 73.57 ± 0.01 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | pp512 | 546.41 ± 6.71 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | tg128 | 73.52 ± 0.02 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7.2__hblt0__fa1__longctx32768.log index 9029d6a..f06a549 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7.2__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7.2__hblt0__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 337.07 ± 4.50 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 52.54 ± 0.01 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 402.65 ± 1.50 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 52.58 ± 0.02 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log index 872dcfb..77a4b05 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1776.02 ± 15.96 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.43 ± 0.01 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | pp512 | 1779.88 ± 16.15 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | tg128 | 73.26 ± 0.01 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log index 409ef11..7323d53 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 596.09 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 57.20 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 583.30 ± 9.18 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 57.85 ± 0.02 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log index a92dd8c..537f920 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1777.68 ± 17.27 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.45 ± 0.01 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | pp512 | 1785.44 ± 15.68 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | tg128 | 73.22 ± 0.01 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log index 3d98873..9599307 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 546.37 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 57.26 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 600.15 ± 13.61 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 57.78 ± 0.01 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log index e0b5cee..adb683c 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | pp512 | 1734.91 ± 19.75 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | tg128 | 73.48 ± 0.01 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | pp512 | 1742.62 ± 12.05 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | tg128 | 73.51 ± 0.01 | build: e0c93af2a (7938) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log index e5b1d94..46f28e1 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 557.68 ± 0.88 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 57.80 ± 0.01 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 428.95 ± 5.63 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 57.82 ± 0.02 | build: e0c93af2a (7938) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1.log index 5330505..c06bd1f 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | pp512 | 1709.32 ± 28.51 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | pp512 | 1730.96 ± 9.70 | | gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | tg128 | 73.53 ± 0.00 | build: e0c93af2a (7938) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1__longctx32768.log index 52dd5fe..e0f62d9 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 541.93 ± 1.58 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 57.84 ± 0.02 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 425.86 ± 3.58 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 57.90 ± 0.02 | build: e0c93af2a (7938) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log index ca81fc1..fd7fc59 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1771.72 ± 240.97 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 7.95 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | pp512 | 1300.97 ± 78.99 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | tg128 | 77.58 ± 0.03 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log index a764fdc..92c9d52 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 294.23 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 7.42 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 337.80 ± 4.40 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | tg32 @ d32768 | 53.06 ± 0.10 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log index 65dad4f..5534eab 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1429.10 ± 24.10 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 80.56 ± 0.18 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | pp512 | 1397.71 ± 70.15 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | tg128 | 80.99 ± 0.06 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log index b18d7b4..9f1beb7 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 284.79 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 56.04 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 416.91 ± 7.90 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | tg32 @ d32768 | 60.56 ± 0.77 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7.2__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7.2__fa1.log index 4834219..4aeb30d 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm-7.2__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7.2__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | pp512 | 549.23 ± 0.33 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | tg128 | 50.62 ± 0.01 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | pp512 | 549.58 ± 0.25 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | tg128 | 50.55 ± 0.02 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7.2__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7.2__fa1__longctx32768.log index fa012c7..a671c82 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm-7.2__fa1__longctx32768.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7.2__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 139.39 ± 0.75 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 5.59 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 144.34 ± 1.03 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 5.60 ± 0.00 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7.2__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7.2__hblt0__fa1.log index 58b1ece..8caec46 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm-7.2__hblt0__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7.2__hblt0__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | pp512 | 549.22 ± 0.08 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | tg128 | 50.67 ± 0.01 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | pp512 | 548.97 ± 0.14 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | tg128 | 50.52 ± 0.02 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7.2__hblt0__fa1__longctx32768.log index 1f2b6d2..259fde9 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm-7.2__hblt0__fa1__longctx32768.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7.2__hblt0__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 137.30 ± 1.39 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 143.70 ± 0.31 | | llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 5.60 ± 0.00 | -build: a14b960bc (7816) +build: e0c93af2a (7938) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log index 28fa54f..d34295f 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1595.85 ± 4.24 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.05 ± 0.02 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | pp512 | 1597.02 ± 1.89 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | tg128 | 51.01 ± 0.01 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log index eabd52b..0f41bbb 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 188.69 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.93 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 193.62 ± 1.29 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 6.93 ± 0.01 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log index 97ed387..ed7cd83 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1599.42 ± 4.42 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.06 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | pp512 | 1598.36 ± 1.08 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | tg128 | 51.01 ± 0.02 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log index 8b942a9..e9332b6 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 187.77 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.93 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 191.18 ± 2.26 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 6.94 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log index 0008484..dd9ab75 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 671.91 ± 0.33 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.72 ± 0.02 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | pp512 | 1590.62 ± 1.92 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | tg128 | 51.17 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log index 724fd52..8634ecc 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 147.45 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.59 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 210.19 ± 4.57 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 5.56 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log index 55d9338..08f165c 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 671.59 ± 0.34 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.64 ± 0.01 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | pp512 | 1590.40 ± 2.71 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | tg128 | 51.21 ± 0.01 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1__longctx32768.log index d7a864a..355d6a7 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 145.15 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.59 ± 0.00 | +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 216.78 ± 3.22 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 5.56 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log index 7655db3..b7dc396 100644 --- a/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 1261.90 ± 215.63 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 6.26 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | pp512 | 349.95 ± 0.30 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | tg128 | 56.00 ± 0.22 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log index 5a169c2..34caa8e 100644 --- a/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 104.76 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 4.00 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 152.53 ± 0.11 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | tg32 @ d32768 | 9.29 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1.log index 5548853..e89bb38 100644 --- a/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 1126.16 ± 2.34 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 55.42 ± 0.08 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | pp512 | 1355.55 ± 2.34 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | tg128 | 55.88 ± 0.13 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log index 697ab8d..2ca9650 100644 --- a/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 162.11 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 8.74 ± 0.00 | +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 246.20 ± 1.24 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | tg32 @ d32768 | 8.76 ± 0.00 | -build: 9c142e3a2 (7670) +build: e0c93af2a (7938) diff --git a/benchmark/results/system_info.json b/benchmark/results/system_info.json index d29bc7f..b7fac37 100644 --- a/benchmark/results/system_info.json +++ b/benchmark/results/system_info.json @@ -1,6 +1 @@ -{ - "distro": "Fedora Linux 42 (Workstation Edition)", - "kernel": "6.18.3-100.fc42.x86_64", - "linux_firmware": "linux-firmware-20251111-1.fc42.noarch", - "timestamp": "09 Jan 2026" -} \ No newline at end of file +{"distro": "Fedora Linux 43 (Workstation Edition)", "kernel": "6.18.5-200.fc43.x86_64", "linux_firmware": "linux-firmware-20260110-1.fc43.noarch", "timestamp": "04 Feb 2026"} diff --git a/docs/results.json b/docs/results.json index e087ac3..54dc9b3 100644 --- a/docs/results.json +++ b/docs/results.json @@ -1,25 +1,17 @@ { "meta": { - "generated_at": "2026-02-04T17:41:39Z", + "generated_at": "2026-02-05T19:03:01Z", "system_info": { - "distro": "Fedora Linux 42 (Workstation Edition)", - "kernel": "6.18.3-100.fc42.x86_64", - "linux_firmware": "linux-firmware-20251111-1.fc42.noarch", - "timestamp": "09 Jan 2026" + "distro": "Fedora Linux 43 (Workstation Edition)", + "kernel": "6.18.5-200.fc43.x86_64", + "linux_firmware": "linux-firmware-20260110-1.fc43.noarch", + "timestamp": "04 Feb 2026" }, "llamacpp_builds": [ { "hash": "2656c0d26", "number": "7693" }, - { - "hash": "9c142e3a2", - "number": "7670" - }, - { - "hash": "a14b960bc", - "number": "7816" - }, { "hash": "e0c93af2a", "number": "7938" @@ -34,7 +26,6 @@ "rocm7-nightlies-hblt0", "rocm7.1.1", "rocm7.1.1-hblt0", - "rocm7_rc", "vulkan_amdvlk", "vulkan_radv" ], @@ -51,8 +42,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 66.52, - "tps_std": 7.27, + "tps_mean": 72.93, + "tps_std": 0.06, "error": false, "error_type": null, "backend": "ROCm", @@ -65,8 +56,8 @@ "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -79,8 +70,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 2.76, - "tps_std": 0.08, + "tps_mean": 2.95, + "tps_std": 0.06, "error": false, "error_type": null, "backend": "ROCm", @@ -93,8 +84,8 @@ "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -108,7 +99,7 @@ "context_tokens": 32768, "test": "pp2048 @ d32768", "tps_mean": 16.57, - "tps_std": 0.05, + "tps_std": 0.04, "error": false, "error_type": null, "backend": "ROCm", @@ -121,8 +112,8 @@ "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -136,7 +127,7 @@ "context_tokens": 32768, "test": "tg32 @ d32768", "tps_mean": 2.08, - "tps_std": 0.0, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -149,8 +140,8 @@ "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -163,8 +154,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 71.47, - "tps_std": 0.2, + "tps_mean": 71.58, + "tps_std": 0.06, "error": false, "error_type": null, "backend": "ROCm", @@ -177,8 +168,8 @@ "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -191,8 +182,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 2.82, - "tps_std": 0.07, + "tps_mean": 2.99, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -205,8 +196,8 @@ "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -219,7 +210,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 16.47, + "tps_mean": 16.48, "tps_std": 0.03, "error": false, "error_type": null, @@ -233,8 +224,8 @@ "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -247,6 +238,342 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", + "tps_mean": 2.07, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 79.51, + "tps_std": 0.07, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 2.72, + "tps_std": 0.1, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 17.57, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 2.15, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 79.24, + "tps_std": 0.1, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 2.67, + "tps_std": 0.11, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 17.53, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 2.15, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 78.28, + "tps_std": 0.06, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 2.98, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 17.18, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", "tps_mean": 2.06, "tps_std": 0.02, "error": false, @@ -258,317 +585,12 @@ "file_size_gib": 70.31, "name_params_b": 125.03, "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 81.03, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 2.79, - "tps_std": 0.07, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 17.37, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.15, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 79.43, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 2.8, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 17.42, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.15, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 73.64, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 2.74, - "tps_std": 0.22, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 123.0, - "quant": "Q4_K_XL", "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, - "build": null + "build": { + "hash": "e0c93af2a", + "number": "7938" + } }, { "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", @@ -580,13 +602,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 72.94, - "tps_std": 2.79, + "tps_mean": 80.59, + "tps_std": 0.1, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 125.03, "file_size_gib": 70.31, "name_params_b": 125.03, @@ -594,8 +616,8 @@ "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -608,13 +630,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 2.79, - "tps_std": 0.16, + "tps_mean": 2.99, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 125.03, "file_size_gib": 70.31, "name_params_b": 125.03, @@ -622,8 +644,8 @@ "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -636,13 +658,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 17.14, - "tps_std": 0.0, + "tps_mean": 17.27, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 125.03, "file_size_gib": 70.31, "name_params_b": 125.03, @@ -650,8 +672,8 @@ "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -670,7 +692,7 @@ "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 125.03, "file_size_gib": 70.31, "name_params_b": 125.03, @@ -678,232 +700,8 @@ "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 77.29, - "tps_std": 5.81, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 2.83, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 17.54, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.0, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 76.84, - "tps_std": 4.54, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 2.84, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 17.3, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.0, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -916,13 +714,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 41.19, - "tps_std": 7.76, + "tps_mean": 17.65, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 125.03, "file_size_gib": 70.31, "name_params_b": 125.03, @@ -930,8 +728,8 @@ "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -944,13 +742,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 1.87, + "tps_mean": 3.0, "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 125.03, "file_size_gib": 70.31, "name_params_b": 125.03, @@ -958,8 +756,8 @@ "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -972,13 +770,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 10.37, - "tps_std": 0.0, + "tps_mean": 4.94, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 125.03, "file_size_gib": 70.31, "name_params_b": 125.03, @@ -986,8 +784,8 @@ "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -1000,13 +798,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 1.26, + "tps_mean": 1.69, "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 125.03, "file_size_gib": 70.31, "name_params_b": 125.03, @@ -1014,8 +812,8 @@ "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -1028,69 +826,69 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 47.53, + "tps_mean": 54.76, + "tps_std": 11.46, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 3.0, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 7.15, "tps_std": 0.02, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 2.96, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 12.5, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 125.03, "file_size_gib": 70.31, "name_params_b": 125.03, @@ -1098,8 +896,8 @@ "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -1118,7 +916,7 @@ "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 125.03, "file_size_gib": 70.31, "name_params_b": 125.03, @@ -1126,13 +924,13 @@ "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", "env": "rocm-7.2", "env_base": "rocm", "env_variant": "7.2", @@ -1140,1147 +938,923 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 133.48, - "tps_std": 0.41, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm-7.2", - "env_base": "rocm", - "env_variant": "7.2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 22.52, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm-7.2", - "env_base": "rocm", - "env_variant": "7.2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 34.18, - "tps_std": 0.22, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm-7.2", - "env_base": "rocm", - "env_variant": "7.2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 8.12, - "tps_std": 0.14, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm-7.2-hblt0", - "env_base": "rocm", - "env_variant": "7.2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 181.28, - "tps_std": 1.15, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm-7.2-hblt0", - "env_base": "rocm", - "env_variant": "7.2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 22.65, - "tps_std": 0.06, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm-7.2-hblt0", - "env_base": "rocm", - "env_variant": "7.2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 35.28, - "tps_std": 0.24, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm-7.2-hblt0", - "env_base": "rocm", - "env_variant": "7.2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 7.97, - "tps_std": 0.43, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 279.35, - "tps_std": 0.9, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 21.56, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 37.23, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 9.31, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 307.88, - "tps_std": 1.76, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 21.76, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 38.53, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 9.32, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 190.14, - "tps_std": 0.23, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 22.71, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 36.33, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 8.11, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 235.84, - "tps_std": 0.85, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 22.71, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 38.02, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 8.14, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 279.68, - "tps_std": 1.3, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 21.83, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 37.93, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 7.99, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 305.29, + "tps_mean": 454.95, "tps_std": 1.9, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log", + "mmap": null, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7.2__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 21.83, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 38.08, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 8.0, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 320.89, - "tps_std": 0.75, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 2.37, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 23.2, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 1.8, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 281.21, - "tps_std": 0.8, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 25.02, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 34.18, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 10.41, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", "env": "rocm-7.2", "env_base": "rocm", "env_variant": "7.2", "fa": true, "context": "default", "context_tokens": null, - "test": "pp512", - "tps_mean": 56.12, - "tps_std": 0.13, + "test": "tg128", + "tps_mean": 22.26, + "tps_std": 0.03, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": null, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__fa1.log", + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7.2__fa1.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 94.79, + "tps_std": 0.56, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 16.48, + "tps_std": 0.09, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 425.21, + "tps_std": 1.79, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 23.41, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 93.83, + "tps_std": 0.4, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 16.55, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 407.15, + "tps_std": 2.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 21.51, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 101.09, + "tps_std": 0.37, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 16.23, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 414.23, + "tps_std": 2.09, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 23.11, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 100.06, + "tps_std": 0.38, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 15.97, + "tps_std": 0.45, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 489.62, + "tps_std": 3.63, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 23.4, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 92.48, + "tps_std": 1.13, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 16.5, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 425.86, + "tps_std": 2.29, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 23.41, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 92.06, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 16.51, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 106.42, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 10.87, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 6.09, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 8.28, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 333.1, + "tps_std": 6.48, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 9.51, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 78.99, + "tps_std": 0.25, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 8.13, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 398.34, + "tps_std": 1.32, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", "env": "rocm-7.2", "env_base": "rocm", "env_variant": "7.2", @@ -2288,27 +1862,923 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 16.6, + "tps_mean": 35.94, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 90.22, + "tps_std": 4.88, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 22.35, "tps_std": 0.04, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": null, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__fa1.log", + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7.2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 398.87, + "tps_std": 1.21, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 36.09, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 92.13, + "tps_std": 0.15, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 21.56, + "tps_std": 1.34, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 947.86, + "tps_std": 2.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 33.77, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 84.85, + "tps_std": 1.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 21.89, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 952.84, + "tps_std": 2.21, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 35.23, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 84.01, + "tps_std": 0.58, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 21.97, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 983.72, + "tps_std": 3.21, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 36.2, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 80.32, + "tps_std": 1.28, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 22.31, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 955.1, + "tps_std": 4.53, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 36.16, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 81.34, + "tps_std": 1.8, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 22.32, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 368.78, + "tps_std": 0.17, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 40.8, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 6.35, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 18.75, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 877.18, + "tps_std": 8.15, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 40.07, + "tps_std": 0.78, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 90.27, + "tps_std": 0.42, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 23.07, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 48.83, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 2.78, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", "env": "rocm-7.2", "env_base": "rocm", "env_variant": "7.2", @@ -2316,27 +2786,27 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 22.87, - "tps_std": 0.07, + "tps_mean": 29.25, + "tps_std": 0.17, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": null, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__fa1__longctx32768.log", + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", "env": "rocm-7.2", "env_base": "rocm", "env_variant": "7.2", @@ -2344,559 +2814,559 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 6.89, + "tps_mean": 2.45, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 49.38, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 2.79, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 29.17, + "tps_std": 0.18, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 2.46, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 146.04, + "tps_std": 0.21, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 2.78, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 36.22, + "tps_std": 0.16, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 2.43, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 146.83, + "tps_std": 0.25, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 2.78, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 36.4, + "tps_std": 0.23, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 2.46, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 155.06, + "tps_std": 0.11, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 2.79, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 38.36, + "tps_std": 0.61, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 2.46, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 151.7, + "tps_std": 0.21, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 2.78, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 38.35, "tps_std": 0.67, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": null, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__fa1__longctx32768.log", + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm-7.2-hblt0", - "env_base": "rocm", - "env_variant": "7.2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 105.5, - "tps_std": 0.54, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm-7.2-hblt0", - "env_base": "rocm", - "env_variant": "7.2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 16.65, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm-7.2-hblt0", - "env_base": "rocm", - "env_variant": "7.2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 32.93, - "tps_std": 0.13, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm-7.2-hblt0", - "env_base": "rocm", - "env_variant": "7.2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 7.24, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 151.37, - "tps_std": 0.24, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 16.55, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 31.21, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 8.25, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 195.36, - "tps_std": 1.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 16.61, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 36.34, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 8.25, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 88.58, - "tps_std": 0.11, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 16.68, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 30.36, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 7.24, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 152.92, - "tps_std": 0.24, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 16.69, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 35.26, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", "env": "rocm7-nightlies-hblt0", "env_base": "rocm7", "env_variant": "nightlies-hblt0", @@ -2904,595 +3374,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 7.23, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 146.64, - "tps_std": 0.35, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 16.6, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 34.21, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 7.19, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 188.13, - "tps_std": 0.15, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 16.61, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 36.05, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 7.2, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 290.49, - "tps_std": 0.3, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 17.74, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 22.76, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 5.26, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 261.76, - "tps_std": 0.99, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 17.93, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 33.3, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 8.69, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm-7.2", - "env_base": "rocm", - "env_variant": "7.2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 49.5, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm-7.2", - "env_base": "rocm", - "env_variant": "7.2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 2.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm-7.2", - "env_base": "rocm", - "env_variant": "7.2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 27.87, - "tps_std": 0.75, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm-7.2", - "env_base": "rocm", - "env_variant": "7.2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.42, - "tps_std": 0.06, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm-7.2-hblt0", - "env_base": "rocm", - "env_variant": "7.2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 49.27, + "tps_mean": 2.46, "tps_std": 0.02, "error": false, "error_type": null, @@ -3503,767 +3385,11 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm-7.2-hblt0", - "env_base": "rocm", - "env_variant": "7.2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 2.79, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm-7.2-hblt0", - "env_base": "rocm", - "env_variant": "7.2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 28.46, - "tps_std": 0.36, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm-7.2-hblt0", - "env_base": "rocm", - "env_variant": "7.2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.44, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 148.26, - "tps_std": 0.07, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 2.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 34.54, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.46, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 147.21, - "tps_std": 0.14, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 2.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 34.82, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.46, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 67.05, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 2.79, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 26.9, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.46, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 66.64, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 2.79, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 26.86, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.46, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 148.44, - "tps_std": 0.08, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 2.79, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 32.66, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.46, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 146.61, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 2.79, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 32.99, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.46, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -4276,13 +3402,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 100.89, - "tps_std": 0.24, + "tps_mean": 21.74, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 70.55, "file_size_gib": 75.65, "name_params_b": 70.55, @@ -4290,8 +3416,8 @@ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -4310,7 +3436,7 @@ "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 70.55, "file_size_gib": 75.65, "name_params_b": 70.55, @@ -4318,8 +3444,8 @@ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -4332,13 +3458,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 18.12, - "tps_std": 0.0, + "tps_mean": 8.35, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 70.55, "file_size_gib": 75.65, "name_params_b": 70.55, @@ -4346,8 +3472,8 @@ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -4360,13 +3486,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 2.16, + "tps_mean": 2.36, "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 70.55, "file_size_gib": 75.65, "name_params_b": 70.55, @@ -4374,8 +3500,8 @@ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -4388,13 +3514,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 87.66, - "tps_std": 0.55, + "tps_mean": 99.39, + "tps_std": 0.58, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 70.55, "file_size_gib": 75.65, "name_params_b": 70.55, @@ -4402,8 +3528,8 @@ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -4416,13 +3542,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 2.78, - "tps_std": 0.0, + "tps_mean": 2.76, + "tps_std": 0.04, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 70.55, "file_size_gib": 75.65, "name_params_b": 70.55, @@ -4430,8 +3556,8 @@ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -4444,13 +3570,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 21.96, - "tps_std": 0.0, + "tps_mean": 11.79, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 70.55, "file_size_gib": 75.65, "name_params_b": 70.55, @@ -4458,8 +3584,8 @@ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -4472,13 +3598,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 2.39, - "tps_std": 0.0, + "tps_mean": 2.44, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 70.55, "file_size_gib": 75.65, "name_params_b": 70.55, @@ -4486,8 +3612,8 @@ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -4500,8 +3626,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 797.13, - "tps_std": 2.39, + "tps_mean": 800.17, + "tps_std": 1.72, "error": false, "error_type": null, "backend": "ROCm", @@ -4514,8 +3640,8 @@ "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__fa1.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -4542,8 +3668,8 @@ "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__fa1.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -4556,8 +3682,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 146.47, - "tps_std": 5.52, + "tps_mean": 169.18, + "tps_std": 1.16, "error": false, "error_type": null, "backend": "ROCm", @@ -4570,8 +3696,8 @@ "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -4598,8 +3724,8 @@ "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -4612,8 +3738,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 803.39, - "tps_std": 2.22, + "tps_mean": 803.22, + "tps_std": 2.21, "error": false, "error_type": null, "backend": "ROCm", @@ -4626,8 +3752,8 @@ "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__hblt0__fa1.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -4654,8 +3780,8 @@ "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__hblt0__fa1.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -4668,8 +3794,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 152.56, - "tps_std": 6.51, + "tps_mean": 170.11, + "tps_std": 0.81, "error": false, "error_type": null, "backend": "ROCm", @@ -4682,8 +3808,8 @@ "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -4710,8 +3836,8 @@ "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -4724,13 +3850,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 801.73, - "tps_std": 2.77, + "tps_mean": 764.18, + "tps_std": 1.66, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 13.51, "file_size_gib": 25.16, "name_params_b": 13.51, @@ -4738,8 +3864,8 @@ "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -4752,13 +3878,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 8.49, + "tps_mean": 8.48, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 13.51, "file_size_gib": 25.16, "name_params_b": 13.51, @@ -4766,8 +3892,8 @@ "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -4780,13 +3906,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 163.31, - "tps_std": 0.0, + "tps_mean": 166.22, + "tps_std": 1.2, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 13.51, "file_size_gib": 25.16, "name_params_b": 13.51, @@ -4794,8 +3920,8 @@ "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -4814,7 +3940,7 @@ "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 13.51, "file_size_gib": 25.16, "name_params_b": 13.51, @@ -4822,8 +3948,8 @@ "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -4836,13 +3962,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 805.52, - "tps_std": 3.18, + "tps_mean": 766.68, + "tps_std": 1.07, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 13.51, "file_size_gib": 25.16, "name_params_b": 13.51, @@ -4850,8 +3976,8 @@ "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -4870,7 +3996,7 @@ "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 13.51, "file_size_gib": 25.16, "name_params_b": 13.51, @@ -4878,8 +4004,8 @@ "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -4892,13 +4018,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 164.32, - "tps_std": 0.0, + "tps_mean": 164.84, + "tps_std": 1.99, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 13.51, "file_size_gib": 25.16, "name_params_b": 13.51, @@ -4906,8 +4032,8 @@ "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -4926,7 +4052,7 @@ "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 13.51, "file_size_gib": 25.16, "name_params_b": 13.51, @@ -4934,8 +4060,8 @@ "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -4948,1259 +4074,139 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 798.6, - "tps_std": 3.84, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 8.49, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 153.77, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 7.1, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 799.84, - "tps_std": 4.89, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 8.49, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 159.82, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 7.11, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 792.78, - "tps_std": 1.08, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 8.5, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 156.32, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 7.11, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 803.71, - "tps_std": 3.13, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 8.5, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 163.31, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 7.11, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 187.83, - "tps_std": 22.96, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 8.19, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 64.52, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 5.69, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 195.84, - "tps_std": 0.06, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 7.56, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 75.42, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 6.23, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm-7.2", - "env_base": "rocm", - "env_variant": "7.2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 393.57, - "tps_std": 2.37, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__fa1.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm-7.2", - "env_base": "rocm", - "env_variant": "7.2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 41.69, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__fa1.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm-7.2", - "env_base": "rocm", - "env_variant": "7.2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 559.16, - "tps_std": 0.99, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm-7.2", - "env_base": "rocm", - "env_variant": "7.2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 39.74, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm-7.2-hblt0", - "env_base": "rocm", - "env_variant": "7.2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 389.1, - "tps_std": 3.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm-7.2-hblt0", - "env_base": "rocm", - "env_variant": "7.2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 41.68, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm-7.2-hblt0", - "env_base": "rocm", - "env_variant": "7.2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 558.52, - "tps_std": 1.33, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm-7.2-hblt0", - "env_base": "rocm", - "env_variant": "7.2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 39.73, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1065.39, - "tps_std": 1.75, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 41.12, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 823.17, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 38.9, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1034.18, - "tps_std": 3.12, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 41.08, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 896.75, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 38.88, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 567.35, - "tps_std": 4.92, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 41.67, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 660.41, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 39.42, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 560.67, + "tps_mean": 990.88, "tps_std": 3.15, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log", + "mmap": null, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.5, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 172.42, + "tps_std": 3.61, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 7.1, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 799.71, + "tps_std": 2.09, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", "env": "rocm7-nightlies-hblt0", "env_base": "rocm7", "env_variant": "nightlies-hblt0", @@ -6208,27 +4214,27 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 41.63, + "tps_mean": 8.49, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log", + "mmap": null, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", "env": "rocm7-nightlies-hblt0", "env_base": "rocm7", "env_variant": "nightlies-hblt0", @@ -6236,27 +4242,27 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 663.35, - "tps_std": 0.0, + "tps_mean": 170.19, + "tps_std": 1.69, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "mmap": null, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", "env": "rocm7-nightlies-hblt0", "env_base": "rocm7", "env_variant": "nightlies-hblt0", @@ -6264,251 +4270,27 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 39.44, + "tps_mean": 7.1, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "mmap": null, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1051.12, - "tps_std": 10.25, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 41.47, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 704.97, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 39.02, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1028.01, - "tps_std": 11.24, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 41.37, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 743.16, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 39.05, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", "env": "vulkan_amdvlk", "env_base": "vulkan_amdvlk", "env_variant": null, @@ -6516,27 +4298,27 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1253.52, - "tps_std": 10.26, + "tps_mean": 19.7, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", + "mmap": null, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", "env": "vulkan_amdvlk", "env_base": "vulkan_amdvlk", "env_variant": null, @@ -6544,27 +4326,27 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 47.03, + "tps_mean": 8.24, "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", + "mmap": null, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", "env": "vulkan_amdvlk", "env_base": "vulkan_amdvlk", "env_variant": null, @@ -6572,27 +4354,27 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 408.37, - "tps_std": 0.0, + "tps_mean": 16.69, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "mmap": null, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", "env": "vulkan_amdvlk", "env_base": "vulkan_amdvlk", "env_variant": null, @@ -6600,27 +4382,27 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 34.93, + "tps_mean": 6.41, "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "mmap": null, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", "env": "vulkan_radv", "env_base": "vulkan_radv", "env_variant": null, @@ -6628,27 +4410,27 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1016.39, - "tps_std": 35.31, + "tps_mean": 222.01, + "tps_std": 0.94, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log", + "mmap": null, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", "env": "vulkan_radv", "env_base": "vulkan_radv", "env_variant": null, @@ -6656,27 +4438,27 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 46.53, - "tps_std": 0.03, + "tps_mean": 7.59, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log", + "mmap": null, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", "env": "vulkan_radv", "env_base": "vulkan_radv", "env_variant": null, @@ -6684,41 +4466,909 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 403.09, + "tps_mean": 76.47, + "tps_std": 0.38, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 6.39, "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", + "mmap": null, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 393.61, + "tps_std": 2.94, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 42.58, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 562.85, + "tps_std": 0.47, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 40.59, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 388.54, + "tps_std": 2.76, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 42.61, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 564.71, + "tps_std": 0.81, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 40.6, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", "env_variant": null, "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1026.87, + "tps_std": 6.06, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 41.9, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 1042.36, + "tps_std": 2.24, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 40.08, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1027.41, + "tps_std": 6.28, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 42.05, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 1038.86, + "tps_std": 3.17, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 40.04, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1070.15, + "tps_std": 5.54, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 42.56, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 908.79, + "tps_std": 27.38, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", "tps_mean": 40.91, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1038.67, + "tps_std": 2.82, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 42.57, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 821.93, + "tps_std": 29.4, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 40.92, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 676.59, + "tps_std": 50.83, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 47.22, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 342.52, + "tps_std": 0.46, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 35.25, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 951.76, + "tps_std": 41.03, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 46.68, "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 799.39, + "tps_std": 0.69, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, "params_b": 31.58, "file_size_gib": 37.66, "name_params_b": 31.58, @@ -6726,8 +5376,36 @@ "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 41.15, + "tps_std": 0.06, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -6740,8 +5418,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 178.32, - "tps_std": 26.83, + "tps_mean": 202.36, + "tps_std": 3.5, "error": false, "error_type": null, "backend": "ROCm", @@ -6754,8 +5432,8 @@ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__fa1.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -6768,8 +5446,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 12.97, - "tps_std": 0.98, + "tps_mean": 15.8, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -6782,8 +5460,8 @@ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__fa1.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -6796,7 +5474,567 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 38.11, + "tps_mean": 41.36, + "tps_std": 0.87, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 9.65, + "tps_std": 0.3, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 200.1, + "tps_std": 8.37, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 16.04, + "tps_std": 0.06, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 41.53, + "tps_std": 0.41, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 9.5, + "tps_std": 0.75, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 205.05, + "tps_std": 3.62, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.98, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 51.11, + "tps_std": 0.62, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 9.63, + "tps_std": 0.07, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 203.41, + "tps_std": 3.52, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 15.0, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 51.19, + "tps_std": 0.64, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 9.58, + "tps_std": 0.1, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 200.04, + "tps_std": 4.11, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 16.19, + "tps_std": 0.09, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 41.42, + "tps_std": 0.37, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 10.94, + "tps_std": 0.42, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 197.48, + "tps_std": 10.8, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 16.2, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 41.6, "tps_std": 0.36, "error": false, "error_type": null, @@ -6807,543 +6045,11 @@ "file_size_gib": 96.99, "name_params_b": 235.09, "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__fa1__longctx32768.log", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm-7.2", - "env_base": "rocm", - "env_variant": "7.2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 9.9, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm-7.2-hblt0", - "env_base": "rocm", - "env_variant": "7.2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 167.63, - "tps_std": 28.76, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm-7.2-hblt0", - "env_base": "rocm", - "env_variant": "7.2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 12.73, - "tps_std": 0.74, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm-7.2-hblt0", - "env_base": "rocm", - "env_variant": "7.2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 37.92, - "tps_std": 0.33, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm-7.2-hblt0", - "env_base": "rocm", - "env_variant": "7.2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 9.78, - "tps_std": 0.2, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 206.6, - "tps_std": 0.55, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 14.93, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 47.83, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 9.71, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 197.06, - "tps_std": 14.56, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 15.02, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 47.67, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 9.7, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 202.07, - "tps_std": 3.84, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 16.09, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 38.42, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 10.0, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 188.66, - "tps_std": 20.66, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 13.61, - "tps_std": 1.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -7355,14 +6061,14 @@ "fa": true, "context": "longctx32768", "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 38.43, - "tps_std": 0.0, + "test": "tg32 @ d32768", + "tps_mean": 10.81, + "tps_std": 0.61, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 235.09, "file_size_gib": 96.99, "name_params_b": 235.09, @@ -7370,248 +6076,27 @@ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 10.01, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 187.93, - "tps_std": 19.38, + "tps_mean": 119.82, + "tps_std": 3.3, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 12.7, - "tps_std": 1.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 235.0, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx32768.log", - "rpc": false, - "build": null - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 17.92, - "tps_std": 2.98, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 7.99, - "tps_std": 0.19, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 57.28, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 9.55, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 168.14, - "tps_std": 0.52, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, "params_b": 235.09, "file_size_gib": 96.99, "name_params_b": 235.09, @@ -7619,8 +6104,8 @@ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -7633,13 +6118,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 2.08, - "tps_std": 0.0, + "tps_mean": 17.75, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 235.09, "file_size_gib": 96.99, "name_params_b": 235.09, @@ -7647,8 +6132,8 @@ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -7661,13 +6146,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 17.62, - "tps_std": 0.0, + "tps_mean": 27.41, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 235.09, "file_size_gib": 96.99, "name_params_b": 235.09, @@ -7675,8 +6160,8 @@ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -7689,13 +6174,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 1.39, + "tps_mean": 3.42, "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 235.09, "file_size_gib": 96.99, "name_params_b": 235.09, @@ -7703,8 +6188,8 @@ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -7717,13 +6202,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 146.89, - "tps_std": 0.98, + "tps_mean": 133.28, + "tps_std": 1.45, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 235.09, "file_size_gib": 96.99, "name_params_b": 235.09, @@ -7731,8 +6216,8 @@ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -7745,13 +6230,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 18.09, - "tps_std": 0.0, + "tps_mean": 15.98, + "tps_std": 0.25, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 235.09, "file_size_gib": 96.99, "name_params_b": 235.09, @@ -7759,8 +6244,8 @@ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -7773,13 +6258,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 22.66, - "tps_std": 0.0, + "tps_mean": 30.79, + "tps_std": 0.06, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 235.09, "file_size_gib": 96.99, "name_params_b": 235.09, @@ -7787,8 +6272,8 @@ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -7801,13 +6286,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 6.52, + "tps_mean": 6.5, "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 235.09, "file_size_gib": 96.99, "name_params_b": 235.09, @@ -7815,8 +6300,8 @@ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -7829,8 +6314,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 484.09, - "tps_std": 10.61, + "tps_mean": 489.11, + "tps_std": 2.88, "error": false, "error_type": null, "backend": "ROCm", @@ -7843,8 +6328,8 @@ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__fa1.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -7857,7 +6342,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 27.12, + "tps_mean": 27.18, "tps_std": 0.16, "error": false, "error_type": null, @@ -7871,8 +6356,8 @@ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__fa1.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -7885,8 +6370,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 162.38, - "tps_std": 4.2, + "tps_mean": 214.97, + "tps_std": 1.13, "error": false, "error_type": null, "backend": "ROCm", @@ -7899,8 +6384,8 @@ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -7913,8 +6398,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 18.71, - "tps_std": 1.16, + "tps_mean": 18.57, + "tps_std": 1.4, "error": false, "error_type": null, "backend": "ROCm", @@ -7927,8 +6412,8 @@ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -7941,8 +6426,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 492.54, - "tps_std": 2.48, + "tps_mean": 492.32, + "tps_std": 2.55, "error": false, "error_type": null, "backend": "ROCm", @@ -7955,8 +6440,8 @@ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__hblt0__fa1.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -7969,8 +6454,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 27.09, - "tps_std": 0.01, + "tps_mean": 27.23, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "ROCm", @@ -7983,8 +6468,8 @@ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__hblt0__fa1.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -7997,8 +6482,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 161.72, - "tps_std": 4.9, + "tps_mean": 207.64, + "tps_std": 0.55, "error": false, "error_type": null, "backend": "ROCm", @@ -8011,8 +6496,8 @@ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -8025,8 +6510,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 19.36, - "tps_std": 0.0, + "tps_mean": 18.84, + "tps_std": 0.97, "error": false, "error_type": null, "backend": "ROCm", @@ -8039,8 +6524,8 @@ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -8053,13 +6538,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 576.12, - "tps_std": 2.09, + "tps_mean": 576.03, + "tps_std": 3.01, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 56.89, "name_params_b": 30.53, @@ -8067,8 +6552,8 @@ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -8081,13 +6566,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 26.83, + "tps_mean": 26.12, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 56.89, "name_params_b": 30.53, @@ -8095,8 +6580,8 @@ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -8109,13 +6594,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 254.34, - "tps_std": 0.0, + "tps_mean": 249.94, + "tps_std": 1.13, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 56.89, "name_params_b": 30.53, @@ -8123,8 +6608,8 @@ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -8137,125 +6622,97 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 19.13, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 585.67, - "tps_std": 2.54, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 26.83, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 244.89, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", "tps_mean": 19.18, - "tps_std": 0.0, + "tps_std": 0.33, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 569.42, + "tps_std": 8.52, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 27.07, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 250.24, + "tps_std": 0.88, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, "params_b": 30.53, "file_size_gib": 56.89, "name_params_b": 30.53, @@ -8263,8 +6720,36 @@ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 18.77, + "tps_std": 0.98, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -8277,13 +6762,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 492.51, - "tps_std": 1.28, + "tps_mean": 512.1, + "tps_std": 4.69, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 56.89, "name_params_b": 30.53, @@ -8291,8 +6776,8 @@ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -8305,13 +6790,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 27.04, - "tps_std": 0.0, + "tps_mean": 27.27, + "tps_std": 0.04, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 56.89, "name_params_b": 30.53, @@ -8319,8 +6804,8 @@ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -8333,13 +6818,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 203.91, - "tps_std": 0.0, + "tps_mean": 216.18, + "tps_std": 0.74, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 56.89, "name_params_b": 30.53, @@ -8347,8 +6832,8 @@ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -8361,13 +6846,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 19.28, + "tps_mean": 19.71, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 56.89, "name_params_b": 30.53, @@ -8375,8 +6860,8 @@ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -8389,13 +6874,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 494.46, - "tps_std": 2.69, + "tps_mean": 493.72, + "tps_std": 3.45, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 56.89, "name_params_b": 30.53, @@ -8403,8 +6888,8 @@ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -8417,13 +6902,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 27.13, + "tps_mean": 27.32, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 56.89, "name_params_b": 30.53, @@ -8431,8 +6916,8 @@ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -8445,13 +6930,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 173.11, - "tps_std": 0.0, + "tps_mean": 209.02, + "tps_std": 0.16, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 56.89, "name_params_b": 30.53, @@ -8459,8 +6944,8 @@ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -8473,246 +6958,22 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 19.24, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 469.57, - "tps_std": 3.31, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 26.97, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 173.18, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 19.22, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 471.12, - "tps_std": 8.43, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 27.02, + "tps_mean": 19.67, "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 56.89, "name_params_b": 30.53, "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 172.54, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 19.16, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -8725,13 +6986,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 424.44, - "tps_std": 1.61, + "tps_mean": 168.95, + "tps_std": 7.69, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 56.89, "name_params_b": 30.53, @@ -8739,8 +7000,8 @@ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -8759,7 +7020,7 @@ "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 56.89, "name_params_b": 30.53, @@ -8767,8 +7028,8 @@ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -8781,13 +7042,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 65.51, - "tps_std": 0.0, + "tps_mean": 75.04, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 56.89, "name_params_b": 30.53, @@ -8795,8 +7056,8 @@ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -8809,13 +7070,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 8.05, - "tps_std": 0.0, + "tps_mean": 8.68, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 56.89, "name_params_b": 30.53, @@ -8823,8 +7084,8 @@ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -8837,13 +7098,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 364.62, - "tps_std": 2.62, + "tps_mean": 351.97, + "tps_std": 2.56, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 56.89, "name_params_b": 30.53, @@ -8851,8 +7112,8 @@ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -8865,13 +7126,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 9.49, - "tps_std": 0.0, + "tps_mean": 9.42, + "tps_std": 0.21, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 56.89, "name_params_b": 30.53, @@ -8879,8 +7140,8 @@ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -8893,13 +7154,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 93.65, - "tps_std": 0.0, + "tps_mean": 127.67, + "tps_std": 0.45, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 56.89, "name_params_b": 30.53, @@ -8907,8 +7168,8 @@ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -8921,13 +7182,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 8.14, - "tps_std": 0.0, + "tps_mean": 8.31, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 56.89, "name_params_b": 30.53, @@ -8935,8 +7196,8 @@ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -8949,8 +7210,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 813.78, - "tps_std": 5.52, + "tps_mean": 815.37, + "tps_std": 5.82, "error": false, "error_type": null, "backend": "ROCm", @@ -8963,8 +7224,8 @@ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__fa1.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -8977,6 +7238,118 @@ "context": "default", "context_tokens": null, "test": "tg128", + "tps_mean": 58.54, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 170.56, + "tps_std": 4.38, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 31.3, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 811.39, + "tps_std": 6.56, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", "tps_mean": 58.57, "tps_std": 0.01, "error": false, @@ -8988,123 +7361,11 @@ "file_size_gib": 24.53, "name_params_b": 30.53, "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__fa1.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm-7.2", - "env_base": "rocm", - "env_variant": "7.2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 154.84, - "tps_std": 3.34, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm-7.2", - "env_base": "rocm", - "env_variant": "7.2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 31.32, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm-7.2-hblt0", - "env_base": "rocm", - "env_variant": "7.2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 789.1, - "tps_std": 47.98, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__hblt0__fa1.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm-7.2-hblt0", - "env_base": "rocm", - "env_variant": "7.2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 58.51, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -9117,8 +7378,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 155.23, - "tps_std": 3.28, + "tps_mean": 171.54, + "tps_std": 4.45, "error": false, "error_type": null, "backend": "ROCm", @@ -9131,8 +7392,8 @@ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -9145,7 +7406,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 31.27, + "tps_mean": 31.29, "tps_std": 0.01, "error": false, "error_type": null, @@ -9159,8 +7420,8 @@ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -9173,13 +7434,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1073.1, - "tps_std": 11.76, + "tps_mean": 1078.99, + "tps_std": 11.01, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 24.53, "name_params_b": 30.53, @@ -9187,8 +7448,8 @@ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -9201,13 +7462,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 57.78, - "tps_std": 0.02, + "tps_mean": 56.45, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 24.53, "name_params_b": 30.53, @@ -9215,8 +7476,8 @@ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -9229,13 +7490,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 206.02, - "tps_std": 0.0, + "tps_mean": 217.17, + "tps_std": 8.71, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 24.53, "name_params_b": 30.53, @@ -9243,8 +7504,8 @@ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -9257,797 +7518,545 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 31.04, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1075.09, - "tps_std": 15.15, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 57.72, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 204.43, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 31.02, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 904.96, - "tps_std": 12.42, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 58.5, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 158.93, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 31.07, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 906.79, - "tps_std": 8.48, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 58.55, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 158.87, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 31.05, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 999.37, - "tps_std": 14.29, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 57.24, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 166.76, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 30.57, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1004.04, - "tps_std": 12.55, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 57.32, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 168.58, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 30.68, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1180.84, - "tps_std": 8.6, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 66.24, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 71.45, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 21.82, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1046.73, - "tps_std": 6.25, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 68.71, - "tps_std": 0.14, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 109.86, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", "tps_mean": 30.94, - "tps_std": 0.0, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1080.52, + "tps_std": 10.73, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 57.49, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 218.42, + "tps_std": 7.66, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 30.96, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1056.78, + "tps_std": 36.08, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 59.15, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 175.4, + "tps_std": 4.11, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 31.98, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1039.16, + "tps_std": 53.94, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 59.16, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 174.67, + "tps_std": 4.22, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 31.98, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 823.08, + "tps_std": 48.0, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 66.14, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 112.99, + "tps_std": 0.13, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 27.35, + "tps_std": 0.07, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1064.73, + "tps_std": 70.49, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 68.93, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 152.3, + "tps_std": 3.42, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, "params_b": 30.53, "file_size_gib": 24.53, "name_params_b": 30.53, @@ -10055,8 +8064,36 @@ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 34.18, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -10069,8 +8106,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1207.32, - "tps_std": 7.42, + "tps_mean": 1209.23, + "tps_std": 7.46, "error": false, "error_type": null, "backend": "ROCm", @@ -10083,8 +8120,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__fa1.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -10097,7 +8134,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 71.51, + "tps_mean": 71.48, "tps_std": 0.01, "error": false, "error_type": null, @@ -10111,8 +8148,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__fa1.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -10125,8 +8162,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 161.97, - "tps_std": 3.98, + "tps_mean": 177.01, + "tps_std": 5.01, "error": false, "error_type": null, "backend": "ROCm", @@ -10139,8 +8176,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -10153,7 +8190,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 34.41, + "tps_mean": 34.4, "tps_std": 0.01, "error": false, "error_type": null, @@ -10167,8 +8204,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -10181,8 +8218,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1096.05, - "tps_std": 129.46, + "tps_mean": 1207.91, + "tps_std": 9.78, "error": false, "error_type": null, "backend": "ROCm", @@ -10195,8 +8232,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__hblt0__fa1.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -10209,7 +8246,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 71.57, + "tps_mean": 71.48, "tps_std": 0.01, "error": false, "error_type": null, @@ -10223,8 +8260,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__hblt0__fa1.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -10237,8 +8274,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 160.95, - "tps_std": 3.41, + "tps_mean": 175.56, + "tps_std": 3.86, "error": false, "error_type": null, "backend": "ROCm", @@ -10251,8 +8288,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -10279,8 +8316,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -10293,13 +8330,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1240.19, - "tps_std": 1.93, + "tps_mean": 1263.87, + "tps_std": 7.23, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 17.35, "name_params_b": 30.53, @@ -10307,8 +8344,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -10321,13 +8358,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 69.06, + "tps_mean": 68.78, "tps_std": 0.02, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 17.35, "name_params_b": 30.53, @@ -10335,8 +8372,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -10349,13 +8386,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 286.57, - "tps_std": 0.0, + "tps_mean": 222.2, + "tps_std": 8.55, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 17.35, "name_params_b": 30.53, @@ -10363,8 +8400,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -10377,13 +8414,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 33.33, - "tps_std": 0.0, + "tps_mean": 33.48, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 17.35, "name_params_b": 30.53, @@ -10391,8 +8428,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -10405,13 +8442,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1246.06, - "tps_std": 12.57, + "tps_mean": 1260.69, + "tps_std": 6.89, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 17.35, "name_params_b": 30.53, @@ -10419,8 +8456,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -10433,13 +8470,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 68.95, - "tps_std": 0.01, + "tps_mean": 68.94, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 17.35, "name_params_b": 30.53, @@ -10447,8 +8484,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -10461,13 +8498,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 211.86, - "tps_std": 0.0, + "tps_mean": 222.34, + "tps_std": 7.55, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 17.35, "name_params_b": 30.53, @@ -10475,8 +8512,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -10489,69 +8526,41 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 33.37, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1225.75, - "tps_std": 5.62, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 71.54, + "tps_mean": 33.52, "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1241.85, + "tps_std": 15.18, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, "params_b": 30.53, "file_size_gib": 17.35, "name_params_b": 30.53, @@ -10559,8 +8568,36 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 72.57, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -10573,13 +8610,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 163.98, - "tps_std": 0.0, + "tps_mean": 176.98, + "tps_std": 4.28, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 17.35, "name_params_b": 30.53, @@ -10587,8 +8624,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -10601,13 +8638,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 34.13, - "tps_std": 0.0, + "tps_mean": 35.43, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 17.35, "name_params_b": 30.53, @@ -10615,8 +8652,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -10629,13 +8666,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1228.38, - "tps_std": 14.75, + "tps_mean": 1229.55, + "tps_std": 20.23, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 17.35, "name_params_b": 30.53, @@ -10643,8 +8680,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -10657,13 +8694,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 71.53, - "tps_std": 0.03, + "tps_mean": 72.45, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 17.35, "name_params_b": 30.53, @@ -10671,8 +8708,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -10685,13 +8722,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 165.67, - "tps_std": 0.0, + "tps_mean": 176.47, + "tps_std": 4.18, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 17.35, "name_params_b": 30.53, @@ -10699,8 +8736,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -10713,13 +8750,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 34.01, - "tps_std": 0.0, + "tps_mean": 35.44, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 17.35, "name_params_b": 30.53, @@ -10727,251 +8764,55 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1224.8, - "tps_std": 12.64, + "tps_mean": 846.24, + "tps_std": 47.6, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 17.35, "name_params_b": 30.53, "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1.log", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 69.07, + "tps_mean": 86.32, "tps_std": 0.04, "error": false, "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 171.3, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 33.02, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1228.44, - "tps_std": 13.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 68.99, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 172.85, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 33.05, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1072.21, - "tps_std": 149.58, - "error": false, - "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 17.35, "name_params_b": 30.53, @@ -10979,36 +8820,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 1.52, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -11021,13 +8834,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 71.87, - "tps_std": 0.0, + "tps_mean": 114.18, + "tps_std": 0.07, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 17.35, "name_params_b": 30.53, @@ -11035,8 +8848,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -11049,13 +8862,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 1.45, - "tps_std": 0.0, + "tps_mean": 30.07, + "tps_std": 0.04, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 17.35, "name_params_b": 30.53, @@ -11063,8 +8876,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -11077,13 +8890,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1075.31, - "tps_std": 42.44, + "tps_mean": 1005.9, + "tps_std": 6.1, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 17.35, "name_params_b": 30.53, @@ -11091,8 +8904,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -11105,13 +8918,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 86.69, - "tps_std": 0.11, + "tps_mean": 79.55, + "tps_std": 6.96, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 17.35, "name_params_b": 30.53, @@ -11119,8 +8932,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -11133,13 +8946,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 111.06, - "tps_std": 0.0, + "tps_mean": 153.83, + "tps_std": 3.76, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 30.53, "file_size_gib": 17.35, "name_params_b": 30.53, @@ -11147,8 +8960,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -11161,178 +8974,150 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 33.81, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7.2", - "env_base": "rocm", - "env_variant": "7.2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 211.96, - "tps_std": 2.48, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7.2", - "env_base": "rocm", - "env_variant": "7.2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 28.74, - "tps_std": 0.49, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7.2", - "env_base": "rocm", - "env_variant": "7.2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 239.82, - "tps_std": 0.51, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7.2", - "env_base": "rocm", - "env_variant": "7.2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 24.76, - "tps_std": 1.87, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7.2-hblt0", - "env_base": "rocm", - "env_variant": "7.2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 262.57, - "tps_std": 3.77, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7.2-hblt0", - "env_base": "rocm", - "env_variant": "7.2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 28.94, + "tps_mean": 37.44, "tps_std": 0.05, "error": false, "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 193.67, + "tps_std": 2.12, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 28.98, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 217.45, + "tps_std": 0.19, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 23.87, + "tps_std": 3.54, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 263.91, + "tps_std": 3.82, + "error": false, + "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": null, @@ -11343,8 +9128,8 @@ "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -11354,11 +9139,11 @@ "env_base": "rocm", "env_variant": "7.2-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 295.41, - "tps_std": 0.37, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 28.97, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -11368,11 +9153,11 @@ "file_size_gib": 86.68, "name_params_b": 79.67, "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -11384,2114 +9169,1694 @@ "fa": true, "context": "longctx32768", "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 23.85, - "tps_std": 3.58, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 587.41, - "tps_std": 3.59, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 28.12, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 421.06, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 25.55, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 610.91, - "tps_std": 4.82, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 28.22, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 432.47, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 25.56, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 354.31, - "tps_std": 5.52, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 29.4, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 327.85, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 26.06, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 364.74, - "tps_std": 5.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 29.38, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 340.53, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 26.05, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 586.56, - "tps_std": 7.78, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 28.8, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 403.28, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 25.53, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 609.81, - "tps_std": 7.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 28.84, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 411.4, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 25.54, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 634.07, - "tps_std": 4.2, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 33.94, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 121.89, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 22.94, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 538.47, - "tps_std": 29.53, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 31.56, - "tps_std": 0.11, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 211.76, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 27.44, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm-7.2", - "env_base": "rocm", - "env_variant": "7.2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 323.22, - "tps_std": 0.21, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__fa1.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm-7.2", - "env_base": "rocm", - "env_variant": "7.2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 14.24, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__fa1.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm-7.2", - "env_base": "rocm", - "env_variant": "7.2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 231.95, - "tps_std": 3.74, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm-7.2", - "env_base": "rocm", - "env_variant": "7.2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 11.66, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm-7.2-hblt0", - "env_base": "rocm", - "env_variant": "7.2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 324.04, - "tps_std": 0.14, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm-7.2-hblt0", - "env_base": "rocm", - "env_variant": "7.2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 14.25, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm-7.2-hblt0", - "env_base": "rocm", - "env_variant": "7.2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 230.54, - "tps_std": 3.36, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm-7.2-hblt0", - "env_base": "rocm", - "env_variant": "7.2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 11.66, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 931.79, - "tps_std": 1.3, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 14.2, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 247.33, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 11.61, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 936.67, - "tps_std": 1.3, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 14.2, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 259.06, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 11.63, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 428.84, - "tps_std": 1.18, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 14.24, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 275.24, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 11.63, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 431.06, - "tps_std": 0.61, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 14.24, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 283.4, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 11.63, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 926.89, - "tps_std": 0.25, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 14.25, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 262.69, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 11.66, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 934.3, - "tps_std": 0.96, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 14.26, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 254.81, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 11.67, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 602.68, - "tps_std": 80.42, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 14.56, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 21.4, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 11.97, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 548.18, - "tps_std": 1.59, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 13.94, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 231.7, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 10.11, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm-7.2", - "env_base": "rocm", - "env_variant": "7.2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 430.73, - "tps_std": 1.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__fa1.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm-7.2", - "env_base": "rocm", - "env_variant": "7.2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 3.86, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__fa1.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm-7.2", - "env_base": "rocm", - "env_variant": "7.2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 193.46, + "tps_mean": 296.64, "tps_std": 0.5, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": null, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 23.44, + "tps_std": 4.28, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 592.54, + "tps_std": 4.38, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 27.45, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 449.68, + "tps_std": 1.06, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 25.1, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 592.83, + "tps_std": 4.39, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 27.75, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 448.82, + "tps_std": 1.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 25.07, + "tps_std": 0.35, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 282.6, + "tps_std": 2.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 28.89, + "tps_std": 0.2, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 238.71, + "tps_std": 0.62, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 23.93, + "tps_std": 4.11, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 590.03, + "tps_std": 3.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 28.73, + "tps_std": 0.52, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 413.78, + "tps_std": 0.61, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 23.54, + "tps_std": 3.3, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 426.39, + "tps_std": 3.26, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 31.84, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 228.41, + "tps_std": 1.5, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 22.47, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 509.22, + "tps_std": 20.34, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 29.92, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 370.94, + "tps_std": 32.12, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 26.0, + "tps_std": 0.2, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 323.33, + "tps_std": 0.27, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.24, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 232.79, + "tps_std": 5.34, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 11.65, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 324.44, + "tps_std": 0.31, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.24, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 229.19, + "tps_std": 6.79, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 11.66, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 936.69, + "tps_std": 1.33, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.23, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 258.34, + "tps_std": 1.81, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 11.63, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 935.37, + "tps_std": 1.09, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.2, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 261.44, + "tps_std": 5.27, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 11.62, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 943.63, + "tps_std": 1.62, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.25, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 396.59, + "tps_std": 26.74, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 11.65, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 942.52, + "tps_std": 1.34, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.25, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 376.68, + "tps_std": 9.34, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 11.65, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 125.5, + "tps_std": 0.06, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.45, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 111.11, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 11.4, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 687.05, + "tps_std": 0.75, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.14, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 376.92, + "tps_std": 18.46, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 11.72, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": null, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 463.92, + "tps_std": 1.19, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 4.02, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 191.32, + "tps_std": 3.3, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, "params_b": 27.01, "file_size_gib": 50.31, "name_params_b": 27.01, @@ -13499,8 +10864,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -13513,8 +10878,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 3.69, - "tps_std": 0.01, + "tps_mean": 3.68, + "tps_std": 0.11, "error": false, "error_type": null, "backend": "ROCm", @@ -13527,8 +10892,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -13541,8 +10906,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 525.55, - "tps_std": 1.67, + "tps_mean": 528.0, + "tps_std": 0.44, "error": false, "error_type": null, "backend": "ROCm", @@ -13555,8 +10920,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__hblt0__fa1.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -13569,7 +10934,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 4.0, + "tps_mean": 4.02, "tps_std": 0.0, "error": false, "error_type": null, @@ -13583,8 +10948,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__hblt0__fa1.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -13597,8 +10962,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 202.49, - "tps_std": 3.52, + "tps_mean": 201.67, + "tps_std": 1.78, "error": false, "error_type": null, "backend": "ROCm", @@ -13611,8 +10976,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -13625,8 +10990,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 3.72, - "tps_std": 0.02, + "tps_mean": 3.74, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -13639,8 +11004,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -13653,13 +11018,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 472.15, - "tps_std": 0.56, + "tps_mean": 508.08, + "tps_std": 0.85, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 27.01, "file_size_gib": 50.31, "name_params_b": 27.01, @@ -13667,8 +11032,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -13687,7 +11052,7 @@ "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 27.01, "file_size_gib": 50.31, "name_params_b": 27.01, @@ -13695,8 +11060,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -13709,13 +11074,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 188.56, - "tps_std": 0.0, + "tps_mean": 222.44, + "tps_std": 2.25, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 27.01, "file_size_gib": 50.31, "name_params_b": 27.01, @@ -13723,8 +11088,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -13743,7 +11108,7 @@ "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 27.01, "file_size_gib": 50.31, "name_params_b": 27.01, @@ -13751,8 +11116,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -13765,13 +11130,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 531.41, - "tps_std": 1.0, + "tps_mean": 508.48, + "tps_std": 0.88, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 27.01, "file_size_gib": 50.31, "name_params_b": 27.01, @@ -13779,8 +11144,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -13799,7 +11164,7 @@ "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 27.01, "file_size_gib": 50.31, "name_params_b": 27.01, @@ -13807,8 +11172,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -13821,13 +11186,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 214.27, - "tps_std": 0.0, + "tps_mean": 220.03, + "tps_std": 0.98, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 27.01, "file_size_gib": 50.31, "name_params_b": 27.01, @@ -13835,8 +11200,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -13855,7 +11220,7 @@ "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 27.01, "file_size_gib": 50.31, "name_params_b": 27.01, @@ -13863,8 +11228,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -13877,13 +11242,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 470.21, - "tps_std": 1.24, + "tps_mean": 549.57, + "tps_std": 2.42, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 27.01, "file_size_gib": 50.31, "name_params_b": 27.01, @@ -13891,8 +11256,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -13905,13 +11270,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 4.01, + "tps_mean": 4.02, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 27.01, "file_size_gib": 50.31, "name_params_b": 27.01, @@ -13919,8 +11284,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -13933,13 +11298,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 179.14, - "tps_std": 0.0, + "tps_mean": 215.98, + "tps_std": 0.94, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 27.01, "file_size_gib": 50.31, "name_params_b": 27.01, @@ -13947,8 +11312,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -13967,7 +11332,7 @@ "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 27.01, "file_size_gib": 50.31, "name_params_b": 27.01, @@ -13975,8 +11340,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -13989,13 +11354,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 526.32, - "tps_std": 1.23, + "tps_mean": 529.01, + "tps_std": 0.98, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 27.01, "file_size_gib": 50.31, "name_params_b": 27.01, @@ -14003,8 +11368,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -14023,7 +11388,7 @@ "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 27.01, "file_size_gib": 50.31, "name_params_b": 27.01, @@ -14031,8 +11396,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -14045,13 +11410,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 193.22, - "tps_std": 0.0, + "tps_mean": 204.92, + "tps_std": 2.92, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 27.01, "file_size_gib": 50.31, "name_params_b": 27.01, @@ -14059,8 +11424,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -14079,7 +11444,7 @@ "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 27.01, "file_size_gib": 50.31, "name_params_b": 27.01, @@ -14087,276 +11452,27 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { "model": "gemma-3-27b-it-BF16-00001-of-00002", "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 467.63, - "tps_std": 1.25, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 4.04, + "tps_mean": 9.32, "tps_std": 0.0, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 182.27, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 3.74, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 527.37, - "tps_std": 1.47, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 4.03, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 201.0, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 3.74, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": false, - "error_type": null, - "backend": null, - "ngl": null, "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log", - "rpc": false, - "build": null - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 111.81, - "tps_std": 20.34, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, "params_b": 27.01, "file_size_gib": 50.31, "name_params_b": 27.01, @@ -14364,8 +11480,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -14378,13 +11494,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 3.85, + "tps_mean": 3.87, "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 27.01, "file_size_gib": 50.31, "name_params_b": 27.01, @@ -14392,8 +11508,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -14406,13 +11522,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 73.77, + "tps_mean": 9.2, "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 27.01, "file_size_gib": 50.31, "name_params_b": 27.01, @@ -14420,8 +11536,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -14434,13 +11550,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 3.4, + "tps_mean": 3.6, "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 27.01, "file_size_gib": 50.31, "name_params_b": 27.01, @@ -14448,8 +11564,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -14462,13 +11578,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 107.4, - "tps_std": 0.65, + "tps_mean": 123.07, + "tps_std": 0.27, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 27.01, "file_size_gib": 50.31, "name_params_b": 27.01, @@ -14476,8 +11592,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -14496,7 +11612,7 @@ "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 27.01, "file_size_gib": 50.31, "name_params_b": 27.01, @@ -14504,8 +11620,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -14518,13 +11634,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 64.09, - "tps_std": 0.0, + "tps_mean": 82.96, + "tps_std": 0.72, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 27.01, "file_size_gib": 50.31, "name_params_b": 27.01, @@ -14532,8 +11648,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -14546,13 +11662,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 3.67, + "tps_mean": 3.66, "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 27.01, "file_size_gib": 50.31, "name_params_b": 27.01, @@ -14560,8 +11676,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -14574,8 +11690,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 2858.32, - "tps_std": 17.99, + "tps_mean": 2870.77, + "tps_std": 12.89, "error": false, "error_type": null, "backend": "ROCm", @@ -14588,8 +11704,8 @@ "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7.2__fa1.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -14603,7 +11719,7 @@ "context_tokens": null, "test": "tg128", "tps_mean": 84.57, - "tps_std": 0.01, + "tps_std": 0.03, "error": false, "error_type": null, "backend": "ROCm", @@ -14616,8 +11732,8 @@ "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7.2__fa1.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -14630,8 +11746,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 1182.57, - "tps_std": 31.53, + "tps_mean": 1639.03, + "tps_std": 15.14, "error": false, "error_type": null, "backend": "ROCm", @@ -14644,8 +11760,8 @@ "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7.2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -14658,8 +11774,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 61.59, - "tps_std": 0.02, + "tps_mean": 61.51, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -14672,8 +11788,8 @@ "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7.2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -14686,8 +11802,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 2679.11, - "tps_std": 228.92, + "tps_mean": 2807.93, + "tps_std": 16.33, "error": false, "error_type": null, "backend": "ROCm", @@ -14700,8 +11816,8 @@ "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7.2__hblt0__fa1.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -14714,8 +11830,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 70.08, - "tps_std": 4.2, + "tps_mean": 84.66, + "tps_std": 0.03, "error": false, "error_type": null, "backend": "ROCm", @@ -14728,8 +11844,8 @@ "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7.2__hblt0__fa1.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -14742,8 +11858,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 1138.9, - "tps_std": 19.11, + "tps_mean": 1262.15, + "tps_std": 24.34, "error": false, "error_type": null, "backend": "ROCm", @@ -14756,8 +11872,8 @@ "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7.2__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -14770,7 +11886,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 61.59, + "tps_mean": 61.54, "tps_std": 0.04, "error": false, "error_type": null, @@ -14784,8 +11900,8 @@ "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7.2__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -14798,13 +11914,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 2884.56, - "tps_std": 5.24, + "tps_mean": 2891.85, + "tps_std": 2.6, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 3.88, "file_size_gib": 1.8, "name_params_b": 3.88, @@ -14812,8 +11928,8 @@ "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -14826,265 +11942,293 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 80.8, + "tps_mean": 82.18, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 1600.62, + "tps_std": 30.98, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 60.21, + "tps_std": 0.11, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 2893.75, + "tps_std": 3.92, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 82.15, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 1419.18, + "tps_std": 40.21, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 60.24, + "tps_std": 0.11, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 2805.65, + "tps_std": 13.25, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 85.35, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 1215.66, + "tps_std": 10.33, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 62.02, "tps_std": 0.03, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 1446.85, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 59.42, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 2874.72, - "tps_std": 3.55, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 80.97, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 1258.46, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 59.59, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 2829.05, - "tps_std": 14.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 82.17, - "tps_std": 4.2, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 1118.35, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 3.88, "file_size_gib": 1.8, "name_params_b": 3.88, @@ -15092,36 +12236,8 @@ "log": "results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 61.04, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -15134,13 +12250,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 2843.1, - "tps_std": 21.02, + "tps_mean": 2800.57, + "tps_std": 47.75, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 3.88, "file_size_gib": 1.8, "name_params_b": 3.88, @@ -15148,8 +12264,8 @@ "log": "results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -15162,13 +12278,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 84.76, + "tps_mean": 85.47, "tps_std": 0.02, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 3.88, "file_size_gib": 1.8, "name_params_b": 3.88, @@ -15176,8 +12292,8 @@ "log": "results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -15190,13 +12306,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 1123.24, - "tps_std": 0.0, + "tps_mean": 1214.2, + "tps_std": 13.26, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 3.88, "file_size_gib": 1.8, "name_params_b": 3.88, @@ -15204,8 +12320,8 @@ "log": "results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -15218,13 +12334,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 61.04, - "tps_std": 0.0, + "tps_mean": 62.03, + "tps_std": 0.03, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 3.88, "file_size_gib": 1.8, "name_params_b": 3.88, @@ -15232,232 +12348,8 @@ "log": "results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 2657.05, - "tps_std": 331.16, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 62.0, - "tps_std": 3.43, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 1124.76, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 59.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 2818.77, - "tps_std": 65.8, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 65.67, - "tps_std": 4.85, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 1136.22, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 59.85, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -15470,13 +12362,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1514.96, - "tps_std": 340.21, + "tps_mean": 657.19, + "tps_std": 0.41, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 3.88, "file_size_gib": 1.8, "name_params_b": 3.88, @@ -15484,8 +12376,8 @@ "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -15498,13 +12390,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 81.61, - "tps_std": 2.29, + "tps_mean": 86.55, + "tps_std": 0.1, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 3.88, "file_size_gib": 1.8, "name_params_b": 3.88, @@ -15512,8 +12404,8 @@ "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -15526,13 +12418,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 188.74, - "tps_std": 0.0, + "tps_mean": 493.7, + "tps_std": 0.98, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 3.88, "file_size_gib": 1.8, "name_params_b": 3.88, @@ -15540,8 +12432,8 @@ "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -15554,13 +12446,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 66.83, - "tps_std": 0.0, + "tps_mean": 58.57, + "tps_std": 0.11, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 3.88, "file_size_gib": 1.8, "name_params_b": 3.88, @@ -15568,8 +12460,8 @@ "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -15582,13 +12474,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1235.5, - "tps_std": 244.41, + "tps_mean": 1977.82, + "tps_std": 204.87, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 3.88, "file_size_gib": 1.8, "name_params_b": 3.88, @@ -15596,8 +12488,8 @@ "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -15610,13 +12502,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 81.02, - "tps_std": 2.09, + "tps_mean": 91.09, + "tps_std": 3.96, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 3.88, "file_size_gib": 1.8, "name_params_b": 3.88, @@ -15624,8 +12516,8 @@ "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -15638,13 +12530,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 694.43, - "tps_std": 0.0, + "tps_mean": 1149.92, + "tps_std": 30.21, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 3.88, "file_size_gib": 1.8, "name_params_b": 3.88, @@ -15652,8 +12544,8 @@ "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -15666,13 +12558,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 49.8, - "tps_std": 0.0, + "tps_mean": 67.86, + "tps_std": 0.22, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 3.88, "file_size_gib": 1.8, "name_params_b": 3.88, @@ -15680,8 +12572,8 @@ "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -15694,8 +12586,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 181.34, - "tps_std": 1.51, + "tps_mean": 181.09, + "tps_std": 1.36, "error": false, "error_type": null, "backend": "ROCm", @@ -15708,8 +12600,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__fa1.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -15722,8 +12614,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 52.25, - "tps_std": 0.01, + "tps_mean": 51.77, + "tps_std": 0.73, "error": false, "error_type": null, "backend": "ROCm", @@ -15736,8 +12628,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__fa1.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -15750,8 +12642,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 217.76, - "tps_std": 0.23, + "tps_mean": 217.33, + "tps_std": 0.32, "error": false, "error_type": null, "backend": "ROCm", @@ -15764,8 +12656,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -15778,8 +12670,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 36.6, - "tps_std": 0.02, + "tps_mean": 36.51, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -15792,8 +12684,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -15806,8 +12698,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 181.33, - "tps_std": 1.42, + "tps_mean": 181.23, + "tps_std": 1.39, "error": false, "error_type": null, "backend": "ROCm", @@ -15820,8 +12712,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__hblt0__fa1.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -15834,7 +12726,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 52.27, + "tps_mean": 52.06, "tps_std": 0.01, "error": false, "error_type": null, @@ -15848,8 +12740,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__hblt0__fa1.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -15862,8 +12754,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 215.89, - "tps_std": 4.16, + "tps_mean": 218.3, + "tps_std": 0.1, "error": false, "error_type": null, "backend": "ROCm", @@ -15876,8 +12768,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -15890,8 +12782,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 36.64, - "tps_std": 0.02, + "tps_mean": 34.08, + "tps_std": 4.33, "error": false, "error_type": null, "backend": "ROCm", @@ -15904,8 +12796,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -15918,13 +12810,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 683.09, - "tps_std": 7.89, + "tps_mean": 681.58, + "tps_std": 4.94, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 116.83, "file_size_gib": 59.02, "name_params_b": 116.83, @@ -15932,8 +12824,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -15946,13 +12838,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 51.5, + "tps_mean": 50.85, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 116.83, "file_size_gib": 59.02, "name_params_b": 116.83, @@ -15960,8 +12852,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -15974,13 +12866,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 334.72, - "tps_std": 0.0, + "tps_mean": 396.76, + "tps_std": 35.92, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 116.83, "file_size_gib": 59.02, "name_params_b": 116.83, @@ -15988,8 +12880,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -16002,13 +12894,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 40.07, - "tps_std": 0.0, + "tps_mean": 40.13, + "tps_std": 0.05, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 116.83, "file_size_gib": 59.02, "name_params_b": 116.83, @@ -16016,8 +12908,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -16030,13 +12922,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 684.21, - "tps_std": 8.3, + "tps_mean": 678.97, + "tps_std": 4.29, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 116.83, "file_size_gib": 59.02, "name_params_b": 116.83, @@ -16044,8 +12936,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -16058,13 +12950,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 51.99, - "tps_std": 0.0, + "tps_mean": 51.88, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 116.83, "file_size_gib": 59.02, "name_params_b": 116.83, @@ -16072,8 +12964,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -16086,13 +12978,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 333.73, - "tps_std": 0.0, + "tps_mean": 400.64, + "tps_std": 35.51, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 116.83, "file_size_gib": 59.02, "name_params_b": 116.83, @@ -16100,8 +12992,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -16114,13 +13006,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 40.14, - "tps_std": 0.0, + "tps_mean": 37.97, + "tps_std": 3.34, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 116.83, "file_size_gib": 59.02, "name_params_b": 116.83, @@ -16128,8 +13020,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -16142,8 +13034,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 674.34, - "tps_std": 3.87, + "tps_mean": 649.28, + "tps_std": 39.54, "error": false, "error_type": null, "backend": "ROCm", @@ -16170,7 +13062,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 52.1, + "tps_mean": 52.0, "tps_std": 0.0, "error": false, "error_type": null, @@ -16198,8 +13090,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 352.35, - "tps_std": 0.56, + "tps_mean": 259.94, + "tps_std": 5.81, "error": false, "error_type": null, "backend": "ROCm", @@ -16226,119 +13118,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 40.44, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 679.62, - "tps_std": 5.41, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 52.11, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 352.62, - "tps_std": 1.18, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 40.46, + "tps_mean": 40.3, "tps_std": 0.03, "error": false, "error_type": null, @@ -16349,6 +13129,90 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 666.65, + "tps_std": 12.5, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 52.05, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 261.35, + "tps_std": 6.47, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log", "rpc": false, "build": { @@ -16359,225 +13223,29 @@ { "model": "gpt-oss-120b-mxfp4-00001-of-00003", "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 675.95, - "tps_std": 6.9, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 51.15, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 40.36, "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 116.83, "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 257.37, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 38.8, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 667.48, - "tps_std": 9.09, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 51.15, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 255.32, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 38.87, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -16590,13 +13258,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 862.37, - "tps_std": 1.02, + "tps_mean": 643.04, + "tps_std": 39.69, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 116.83, "file_size_gib": 59.02, "name_params_b": 116.83, @@ -16604,8 +13272,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -16618,13 +13286,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 5.59, - "tps_std": 0.0, + "tps_mean": 54.0, + "tps_std": 0.04, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 116.83, "file_size_gib": 59.02, "name_params_b": 116.83, @@ -16632,8 +13300,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -16646,13 +13314,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 183.43, - "tps_std": 0.0, + "tps_mean": 195.45, + "tps_std": 2.65, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 116.83, "file_size_gib": 59.02, "name_params_b": 116.83, @@ -16660,8 +13328,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -16674,13 +13342,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 5.21, - "tps_std": 0.0, + "tps_mean": 37.02, + "tps_std": 0.03, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 116.83, "file_size_gib": 59.02, "name_params_b": 116.83, @@ -16688,8 +13356,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -16702,13 +13370,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 585.93, - "tps_std": 27.57, + "tps_mean": 597.02, + "tps_std": 9.82, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 116.83, "file_size_gib": 59.02, "name_params_b": 116.83, @@ -16716,8 +13384,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -16730,13 +13398,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 57.18, - "tps_std": 0.07, + "tps_mean": 57.38, + "tps_std": 0.04, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 116.83, "file_size_gib": 59.02, "name_params_b": 116.83, @@ -16744,8 +13412,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -16758,13 +13426,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 160.38, - "tps_std": 0.0, + "tps_mean": 278.37, + "tps_std": 7.19, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 116.83, "file_size_gib": 59.02, "name_params_b": 116.83, @@ -16772,8 +13440,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -16786,13 +13454,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 38.7, - "tps_std": 0.0, + "tps_mean": 42.78, + "tps_std": 0.09, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 116.83, "file_size_gib": 59.02, "name_params_b": 116.83, @@ -16800,8 +13468,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -16814,8 +13482,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 548.07, - "tps_std": 6.87, + "tps_mean": 547.85, + "tps_std": 6.58, "error": false, "error_type": null, "backend": "ROCm", @@ -16828,8 +13496,8 @@ "log": "results/gpt-oss-20b-mxfp4__rocm-7.2__fa1.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -16842,7 +13510,63 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 73.66, + "tps_mean": 73.52, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 402.32, + "tps_std": 0.67, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 52.5, "tps_std": 0.02, "error": false, "error_type": null, @@ -16853,67 +13577,11 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7.2__fa1.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7.2", - "env_base": "rocm", - "env_variant": "7.2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 337.62, - "tps_std": 3.36, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm-7.2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7.2", - "env_base": "rocm", - "env_variant": "7.2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 52.54, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7.2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -16926,8 +13594,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 546.89, - "tps_std": 7.03, + "tps_mean": 546.41, + "tps_std": 6.71, "error": false, "error_type": null, "backend": "ROCm", @@ -16940,8 +13608,8 @@ "log": "results/gpt-oss-20b-mxfp4__rocm-7.2__hblt0__fa1.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -16954,8 +13622,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 73.57, - "tps_std": 0.01, + "tps_mean": 73.52, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "ROCm", @@ -16968,8 +13636,8 @@ "log": "results/gpt-oss-20b-mxfp4__rocm-7.2__hblt0__fa1.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -16982,8 +13650,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 337.07, - "tps_std": 4.5, + "tps_mean": 402.65, + "tps_std": 1.5, "error": false, "error_type": null, "backend": "ROCm", @@ -16996,8 +13664,8 @@ "log": "results/gpt-oss-20b-mxfp4__rocm-7.2__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -17010,8 +13678,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 52.54, - "tps_std": 0.01, + "tps_mean": 52.58, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "ROCm", @@ -17024,8 +13692,8 @@ "log": "results/gpt-oss-20b-mxfp4__rocm-7.2__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -17038,13 +13706,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1776.02, - "tps_std": 15.96, + "tps_mean": 1779.88, + "tps_std": 16.15, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 20.91, "file_size_gib": 11.27, "name_params_b": 20.91, @@ -17052,8 +13720,8 @@ "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -17066,13 +13734,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 73.43, + "tps_mean": 73.26, "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 20.91, "file_size_gib": 11.27, "name_params_b": 20.91, @@ -17080,8 +13748,8 @@ "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -17094,13 +13762,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 596.09, - "tps_std": 0.0, + "tps_mean": 583.3, + "tps_std": 9.18, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 20.91, "file_size_gib": 11.27, "name_params_b": 20.91, @@ -17108,8 +13776,8 @@ "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -17122,13 +13790,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 57.2, - "tps_std": 0.0, + "tps_mean": 57.85, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 20.91, "file_size_gib": 11.27, "name_params_b": 20.91, @@ -17136,8 +13804,8 @@ "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -17150,13 +13818,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1777.68, - "tps_std": 17.27, + "tps_mean": 1785.44, + "tps_std": 15.68, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 20.91, "file_size_gib": 11.27, "name_params_b": 20.91, @@ -17164,8 +13832,8 @@ "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -17178,13 +13846,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 73.45, + "tps_mean": 73.22, "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 20.91, "file_size_gib": 11.27, "name_params_b": 20.91, @@ -17192,8 +13860,8 @@ "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -17206,13 +13874,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 546.37, - "tps_std": 0.0, + "tps_mean": 600.15, + "tps_std": 13.61, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 20.91, "file_size_gib": 11.27, "name_params_b": 20.91, @@ -17220,8 +13888,8 @@ "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -17234,13 +13902,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 57.26, - "tps_std": 0.0, + "tps_mean": 57.78, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 20.91, "file_size_gib": 11.27, "name_params_b": 20.91, @@ -17248,8 +13916,8 @@ "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -17262,8 +13930,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1734.91, - "tps_std": 19.75, + "tps_mean": 1742.62, + "tps_std": 12.05, "error": false, "error_type": null, "backend": "ROCm", @@ -17290,7 +13958,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 73.48, + "tps_mean": 73.51, "tps_std": 0.01, "error": false, "error_type": null, @@ -17318,8 +13986,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 557.68, - "tps_std": 0.88, + "tps_mean": 428.95, + "tps_std": 5.63, "error": false, "error_type": null, "backend": "ROCm", @@ -17346,8 +14014,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 57.8, - "tps_std": 0.01, + "tps_mean": 57.82, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "ROCm", @@ -17374,8 +14042,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1709.32, - "tps_std": 28.51, + "tps_mean": 1730.96, + "tps_std": 9.7, "error": false, "error_type": null, "backend": "ROCm", @@ -17430,8 +14098,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 541.93, - "tps_std": 1.58, + "tps_mean": 425.86, + "tps_std": 3.58, "error": false, "error_type": null, "backend": "ROCm", @@ -17458,7 +14126,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 57.84, + "tps_mean": 57.9, "tps_std": 0.02, "error": false, "error_type": null, @@ -17476,230 +14144,6 @@ "number": "7938" } }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1710.76, - "tps_std": 45.8, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7.1.1__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 72.34, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7.1.1__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 420.14, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7.1.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 55.52, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7.1.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1732.05, - "tps_std": 23.11, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 72.34, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 417.44, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 55.34, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", @@ -17710,13 +14154,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1771.72, - "tps_std": 240.97, + "tps_mean": 1300.97, + "tps_std": 78.99, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 20.91, "file_size_gib": 11.27, "name_params_b": 20.91, @@ -17724,8 +14168,8 @@ "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -17738,13 +14182,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 7.95, - "tps_std": 0.0, + "tps_mean": 77.58, + "tps_std": 0.03, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 20.91, "file_size_gib": 11.27, "name_params_b": 20.91, @@ -17752,8 +14196,8 @@ "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -17766,13 +14210,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 294.23, - "tps_std": 0.0, + "tps_mean": 337.8, + "tps_std": 4.4, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 20.91, "file_size_gib": 11.27, "name_params_b": 20.91, @@ -17780,8 +14224,8 @@ "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -17794,13 +14238,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 7.42, - "tps_std": 0.0, + "tps_mean": 53.06, + "tps_std": 0.1, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 20.91, "file_size_gib": 11.27, "name_params_b": 20.91, @@ -17808,8 +14252,8 @@ "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -17822,13 +14266,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1429.1, - "tps_std": 24.1, + "tps_mean": 1397.71, + "tps_std": 70.15, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 20.91, "file_size_gib": 11.27, "name_params_b": 20.91, @@ -17836,8 +14280,8 @@ "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -17850,13 +14294,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 80.56, - "tps_std": 0.18, + "tps_mean": 80.99, + "tps_std": 0.06, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 20.91, "file_size_gib": 11.27, "name_params_b": 20.91, @@ -17864,8 +14308,8 @@ "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -17878,13 +14322,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 284.79, - "tps_std": 0.0, + "tps_mean": 416.91, + "tps_std": 7.9, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 20.91, "file_size_gib": 11.27, "name_params_b": 20.91, @@ -17892,8 +14336,8 @@ "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -17906,13 +14350,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 56.04, - "tps_std": 0.0, + "tps_mean": 60.56, + "tps_std": 0.77, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 20.91, "file_size_gib": 11.27, "name_params_b": 20.91, @@ -17920,8 +14364,8 @@ "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -17934,8 +14378,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 549.23, - "tps_std": 0.33, + "tps_mean": 549.58, + "tps_std": 0.25, "error": false, "error_type": null, "backend": "ROCm", @@ -17948,8 +14392,8 @@ "log": "results/llama-2-7b.Q4_0__rocm-7.2__fa1.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -17962,8 +14406,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 50.62, - "tps_std": 0.01, + "tps_mean": 50.55, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "ROCm", @@ -17976,8 +14420,8 @@ "log": "results/llama-2-7b.Q4_0__rocm-7.2__fa1.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -17990,8 +14434,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 139.39, - "tps_std": 0.75, + "tps_mean": 144.34, + "tps_std": 1.03, "error": false, "error_type": null, "backend": "ROCm", @@ -18004,8 +14448,8 @@ "log": "results/llama-2-7b.Q4_0__rocm-7.2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -18018,7 +14462,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 5.59, + "tps_mean": 5.6, "tps_std": 0.0, "error": false, "error_type": null, @@ -18032,8 +14476,8 @@ "log": "results/llama-2-7b.Q4_0__rocm-7.2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -18046,8 +14490,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 549.22, - "tps_std": 0.08, + "tps_mean": 548.97, + "tps_std": 0.14, "error": false, "error_type": null, "backend": "ROCm", @@ -18060,8 +14504,8 @@ "log": "results/llama-2-7b.Q4_0__rocm-7.2__hblt0__fa1.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -18074,8 +14518,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 50.67, - "tps_std": 0.01, + "tps_mean": 50.52, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "ROCm", @@ -18088,8 +14532,8 @@ "log": "results/llama-2-7b.Q4_0__rocm-7.2__hblt0__fa1.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -18102,8 +14546,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 137.3, - "tps_std": 1.39, + "tps_mean": 143.7, + "tps_std": 0.31, "error": false, "error_type": null, "backend": "ROCm", @@ -18116,8 +14560,8 @@ "log": "results/llama-2-7b.Q4_0__rocm-7.2__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -18144,8 +14588,8 @@ "log": "results/llama-2-7b.Q4_0__rocm-7.2__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "a14b960bc", - "number": "7816" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -18158,13 +14602,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1595.85, - "tps_std": 4.24, + "tps_mean": 1597.02, + "tps_std": 1.89, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 6.74, "file_size_gib": 3.56, "name_params_b": 6.74, @@ -18172,8 +14616,8 @@ "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -18186,13 +14630,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 51.05, - "tps_std": 0.02, + "tps_mean": 51.01, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 6.74, "file_size_gib": 3.56, "name_params_b": 6.74, @@ -18200,8 +14644,8 @@ "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -18214,13 +14658,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 188.69, - "tps_std": 0.0, + "tps_mean": 193.62, + "tps_std": 1.29, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 6.74, "file_size_gib": 3.56, "name_params_b": 6.74, @@ -18228,8 +14672,8 @@ "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -18243,12 +14687,12 @@ "context_tokens": 32768, "test": "tg32 @ d32768", "tps_mean": 6.93, - "tps_std": 0.0, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 6.74, "file_size_gib": 3.56, "name_params_b": 6.74, @@ -18256,8 +14700,8 @@ "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -18270,13 +14714,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1599.42, - "tps_std": 4.42, + "tps_mean": 1598.36, + "tps_std": 1.08, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 6.74, "file_size_gib": 3.56, "name_params_b": 6.74, @@ -18284,8 +14728,8 @@ "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -18298,125 +14742,97 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 51.06, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 187.77, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 6.93, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 671.91, - "tps_std": 0.33, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 50.72, + "tps_mean": 51.01, "tps_std": 0.02, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 191.18, + "tps_std": 2.26, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 6.94, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1590.62, + "tps_std": 1.92, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, "params_b": 6.74, "file_size_gib": 3.56, "name_params_b": 6.74, @@ -18424,8 +14840,36 @@ "log": "results/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.17, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -18438,13 +14882,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 147.45, - "tps_std": 0.0, + "tps_mean": 210.19, + "tps_std": 4.57, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 6.74, "file_size_gib": 3.56, "name_params_b": 6.74, @@ -18452,8 +14896,8 @@ "log": "results/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -18466,13 +14910,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 5.59, + "tps_mean": 5.56, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 6.74, "file_size_gib": 3.56, "name_params_b": 6.74, @@ -18480,8 +14924,8 @@ "log": "results/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -18494,13 +14938,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 671.59, - "tps_std": 0.34, + "tps_mean": 1590.4, + "tps_std": 2.71, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 6.74, "file_size_gib": 3.56, "name_params_b": 6.74, @@ -18508,8 +14952,8 @@ "log": "results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -18522,13 +14966,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 50.64, + "tps_mean": 51.21, "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 6.74, "file_size_gib": 3.56, "name_params_b": 6.74, @@ -18536,8 +14980,8 @@ "log": "results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -18550,13 +14994,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 145.15, - "tps_std": 0.0, + "tps_mean": 216.78, + "tps_std": 3.22, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 6.74, "file_size_gib": 3.56, "name_params_b": 6.74, @@ -18564,8 +15008,8 @@ "log": "results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -18578,13 +15022,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 5.59, + "tps_mean": 5.56, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 6.74, "file_size_gib": 3.56, "name_params_b": 6.74, @@ -18592,232 +15036,8 @@ "log": "results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1578.33, - "tps_std": 5.18, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7.1.1__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 50.91, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7.1.1__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 165.75, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7.1.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7.1.1", - "env_base": "rocm7.1.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 7.79, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7.1.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1587.41, - "tps_std": 1.76, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 50.89, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 165.58, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7.1.1-hblt0", - "env_base": "rocm7.1.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 7.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -18830,13 +15050,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1261.9, - "tps_std": 215.63, + "tps_mean": 349.95, + "tps_std": 0.3, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 6.74, "file_size_gib": 3.56, "name_params_b": 6.74, @@ -18844,8 +15064,8 @@ "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -18858,13 +15078,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 6.26, - "tps_std": 0.0, + "tps_mean": 56.0, + "tps_std": 0.22, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 6.74, "file_size_gib": 3.56, "name_params_b": 6.74, @@ -18872,8 +15092,8 @@ "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -18886,13 +15106,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 104.76, - "tps_std": 0.0, + "tps_mean": 152.53, + "tps_std": 0.11, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 6.74, "file_size_gib": 3.56, "name_params_b": 6.74, @@ -18900,8 +15120,8 @@ "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -18914,13 +15134,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 4.0, + "tps_mean": 9.29, "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 6.74, "file_size_gib": 3.56, "name_params_b": 6.74, @@ -18928,8 +15148,8 @@ "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -18942,13 +15162,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1126.16, + "tps_mean": 1355.55, "tps_std": 2.34, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 6.74, "file_size_gib": 3.56, "name_params_b": 6.74, @@ -18956,8 +15176,8 @@ "log": "results/llama-2-7b.Q4_0__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -18970,13 +15190,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 55.42, - "tps_std": 0.08, + "tps_mean": 55.88, + "tps_std": 0.13, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 6.74, "file_size_gib": 3.56, "name_params_b": 6.74, @@ -18984,8 +15204,8 @@ "log": "results/llama-2-7b.Q4_0__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -18998,13 +15218,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 162.11, - "tps_std": 0.0, + "tps_mean": 246.2, + "tps_std": 1.24, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 6.74, "file_size_gib": 3.56, "name_params_b": 6.74, @@ -19012,8 +15232,8 @@ "log": "results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, { @@ -19026,13 +15246,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 8.74, + "tps_mean": 8.76, "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": 0, + "mmap": null, "params_b": 6.74, "file_size_gib": 3.56, "name_params_b": 6.74, @@ -19040,8 +15260,8 @@ "log": "results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "9c142e3a2", - "number": "7670" + "hash": "e0c93af2a", + "number": "7938" } }, {