From a7ace8dba7c6914a8cae658cc4c4abf4efb3e4a9 Mon Sep 17 00:00:00 2001 From: Donato Capitella Date: Mon, 30 Mar 2026 08:37:15 +0100 Subject: [PATCH] updted benchmarks --- ...-Q4_K_XL-00001-of-00002__rocm-7_2__fa1.log | 8 + ...-of-00002__rocm-7_2__fa1__longctx32768.log | 8 + ...L-00001-of-00002__rocm-7_2__hblt0__fa1.log | 8 + ...02__rocm-7_2__hblt0__fa1__longctx32768.log | 8 + ...Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log | 8 + ...of-00002__rocm6_4_4__fa1__longctx32768.log | 8 + ...-00001-of-00002__rocm6_4_4__hblt0__fa1.log | 8 + ...2__rocm6_4_4__hblt0__fa1__longctx32768.log | 8 + ...L-00001-of-00002__rocm7-nightlies__fa1.log | 8 + ...02__rocm7-nightlies__fa1__longctx32768.log | 8 + ...-of-00002__rocm7-nightlies__hblt0__fa1.log | 8 + ...m7-nightlies__hblt0__fa1__longctx32768.log | 8 + ..._XL-00001-of-00002__vulkan_amdvlk__fa1.log | 8 + ...0002__vulkan_amdvlk__fa1__longctx32768.log | 8 + ..._K_XL-00001-of-00002__vulkan_radv__fa1.log | 8 + ...-00002__vulkan_radv__fa1__longctx32768.log | 8 + ...ash-BF16-00001-of-00002__rocm-7_2__fa1.log | 8 + ...-of-00002__rocm-7_2__fa1__longctx32768.log | 8 + ...6-00001-of-00002__rocm-7_2__hblt0__fa1.log | 8 + ...02__rocm-7_2__hblt0__fa1__longctx32768.log | 8 + ...sh-BF16-00001-of-00002__rocm6_4_4__fa1.log | 8 + ...of-00002__rocm6_4_4__fa1__longctx32768.log | 8 + ...-00001-of-00002__rocm6_4_4__hblt0__fa1.log | 8 + ...2__rocm6_4_4__hblt0__fa1__longctx32768.log | 8 + ...6-00001-of-00002__rocm7-nightlies__fa1.log | 8 + ...02__rocm7-nightlies__fa1__longctx32768.log | 8 + ...-of-00002__rocm7-nightlies__hblt0__fa1.log | 8 + ...m7-nightlies__hblt0__fa1__longctx32768.log | 8 + ...F16-00001-of-00002__vulkan_amdvlk__fa1.log | 8 + ...0002__vulkan_amdvlk__fa1__longctx32768.log | 8 + ...-BF16-00001-of-00002__vulkan_radv__fa1.log | 8 + ...-00002__vulkan_radv__fa1__longctx32768.log | 8 + ...LM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1.log | 8 + ...D-Q8_K_XL__rocm-7_2__fa1__longctx32768.log | 8 + ...Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log | 8 + ...XL__rocm-7_2__hblt0__fa1__longctx32768.log | 8 + ...M-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log | 8 + ...-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log | 8 + ...lash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log | 8 + ...L__rocm6_4_4__hblt0__fa1__longctx32768.log | 8 + ...Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log | 8 + ...XL__rocm7-nightlies__fa1__longctx32768.log | 8 + ...D-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log | 8 + ...m7-nightlies__hblt0__fa1__longctx32768.log | 8 + ...7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log | 8 + ...K_XL__vulkan_amdvlk__fa1__longctx32768.log | 8 + ...4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log | 8 + ...8_K_XL__vulkan_radv__fa1__longctx32768.log | 8 + ...-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log | 0 ...-of-00002__rocm-7_2__fa1__longctx32768.log | 0 ...L-00001-of-00002__rocm-7_2__hblt0__fa1.log | 0 ...02__rocm-7_2__hblt0__fa1__longctx32768.log | 0 ...Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log | 0 ...of-00002__rocm6_4_4__fa1__longctx32768.log | 0 ...-00001-of-00002__rocm6_4_4__hblt0__fa1.log | 0 ...2__rocm6_4_4__hblt0__fa1__longctx32768.log | 0 ...L-00001-of-00002__rocm7-nightlies__fa1.log | 0 ...02__rocm7-nightlies__fa1__longctx32768.log | 0 ...-of-00002__rocm7-nightlies__hblt0__fa1.log | 0 ...m7-nightlies__hblt0__fa1__longctx32768.log | 0 ..._XL-00001-of-00002__vulkan_amdvlk__fa1.log | 0 ...0002__vulkan_amdvlk__fa1__longctx32768.log | 0 ..._K_XL-00001-of-00002__vulkan_radv__fa1.log | 0 ...-00002__vulkan_radv__fa1__longctx32768.log | 0 ...-14B-Instruct-2512-BF16__rocm-7_2__fa1.log | 8 + ...2512-BF16__rocm-7_2__fa1__longctx32768.log | 8 + ...struct-2512-BF16__rocm-7_2__hblt0__fa1.log | 8 + ...16__rocm-7_2__hblt0__fa1__longctx32768.log | 8 + ...14B-Instruct-2512-BF16__rocm6_4_4__fa1.log | 8 + ...512-BF16__rocm6_4_4__fa1__longctx32768.log | 8 + ...truct-2512-BF16__rocm6_4_4__hblt0__fa1.log | 8 + ...6__rocm6_4_4__hblt0__fa1__longctx32768.log | 8 + ...struct-2512-BF16__rocm7-nightlies__fa1.log | 8 + ...16__rocm7-nightlies__fa1__longctx32768.log | 8 + ...2512-BF16__rocm7-nightlies__hblt0__fa1.log | 8 + ...m7-nightlies__hblt0__fa1__longctx32768.log | 8 + ...Instruct-2512-BF16__vulkan_amdvlk__fa1.log | 8 + ...BF16__vulkan_amdvlk__fa1__longctx32768.log | 8 + ...B-Instruct-2512-BF16__vulkan_radv__fa1.log | 8 + ...2-BF16__vulkan_radv__fa1__longctx32768.log | 8 + ...Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__fa1.log | 0 ...D-Q8_K_XL__rocm-7_2__fa1__longctx32768.log | 0 ...B-A3B-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log | 0 ...XL__rocm-7_2__hblt0__fa1__longctx32768.log | 0 ...ano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log | 0 ...-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log | 0 ...-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log | 0 ...L__rocm6_4_4__hblt0__fa1__longctx32768.log | 0 ...B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log | 0 ...XL__rocm7-nightlies__fa1__longctx32768.log | 0 ...D-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log | 0 ...m7-nightlies__hblt0__fa1__longctx32768.log | 0 ...30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log | 0 ...K_XL__vulkan_amdvlk__fa1__longctx32768.log | 0 ...o-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log | 0 ...8_K_XL__vulkan_radv__fa1__longctx32768.log | 0 ...-Q3_K_XL-00001-of-00003__rocm-7_2__fa1.log | 8 + ...-of-00003__rocm-7_2__fa1__longctx32768.log | 8 + ...L-00001-of-00003__rocm-7_2__hblt0__fa1.log | 8 + ...03__rocm-7_2__hblt0__fa1__longctx32768.log | 8 + ...Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log | 8 + ...of-00003__rocm6_4_4__fa1__longctx32768.log | 8 + ...-00001-of-00003__rocm6_4_4__hblt0__fa1.log | 8 + ...3__rocm6_4_4__hblt0__fa1__longctx32768.log | 8 + ...L-00001-of-00003__rocm7-nightlies__fa1.log | 8 + ...03__rocm7-nightlies__fa1__longctx32768.log | 8 + ...-of-00003__rocm7-nightlies__hblt0__fa1.log | 8 + ...m7-nightlies__hblt0__fa1__longctx32768.log | 8 + ..._XL-00001-of-00003__vulkan_amdvlk__fa1.log | 8 + ...0003__vulkan_amdvlk__fa1__longctx32768.log | 8 + ..._K_XL-00001-of-00003__vulkan_radv__fa1.log | 8 + ...-00003__vulkan_radv__fa1__longctx32768.log | 8 + ...A3B-BF16-00001-of-00002__rocm-7_2__fa1.log | 8 + ...-of-00002__rocm-7_2__fa1__longctx32768.log | 8 + ...6-00001-of-00002__rocm-7_2__hblt0__fa1.log | 8 + ...02__rocm-7_2__hblt0__fa1__longctx32768.log | 8 + ...3B-BF16-00001-of-00002__rocm6_4_4__fa1.log | 8 + ...of-00002__rocm6_4_4__fa1__longctx32768.log | 8 + ...-00001-of-00002__rocm6_4_4__hblt0__fa1.log | 8 + ...2__rocm6_4_4__hblt0__fa1__longctx32768.log | 8 + ...6-00001-of-00002__rocm7-nightlies__fa1.log | 8 + ...02__rocm7-nightlies__fa1__longctx32768.log | 8 + ...-of-00002__rocm7-nightlies__hblt0__fa1.log | 8 + ...m7-nightlies__hblt0__fa1__longctx32768.log | 8 + ...F16-00001-of-00002__vulkan_amdvlk__fa1.log | 8 + ...0002__vulkan_amdvlk__fa1__longctx32768.log | 8 + ...-BF16-00001-of-00002__vulkan_radv__fa1.log | 8 + ...-00002__vulkan_radv__fa1__longctx32768.log | 8 + ...nstruct-2507-UD-Q6_K_XL__rocm-7_2__fa1.log | 8 + ...D-Q6_K_XL__rocm-7_2__fa1__longctx32768.log | 8 + ...-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1.log | 8 + ...XL__rocm-7_2__hblt0__fa1__longctx32768.log | 8 + ...struct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log | 8 + ...-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log | 8 + ...2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log | 8 + ...L__rocm6_4_4__hblt0__fa1__longctx32768.log | 8 + ...-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log | 8 + ...XL__rocm7-nightlies__fa1__longctx32768.log | 8 + ...D-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log | 8 + ...m7-nightlies__hblt0__fa1__longctx32768.log | 8 + ...ct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log | 8 + ...K_XL__vulkan_amdvlk__fa1__longctx32768.log | 8 + ...ruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log | 8 + ...6_K_XL__vulkan_radv__fa1__longctx32768.log | 8 + ...30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1.log | 8 + ...ct-Q4_K_M__rocm-7_2__fa1__longctx32768.log | 8 + ...-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1.log | 8 + ..._M__rocm-7_2__hblt0__fa1__longctx32768.log | 8 + ...0B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log | 8 + ...t-Q4_K_M__rocm6_4_4__fa1__longctx32768.log | 8 + ...Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log | 8 + ...M__rocm6_4_4__hblt0__fa1__longctx32768.log | 8 + ...-Instruct-Q4_K_M__rocm7-nightlies__fa1.log | 8 + ..._M__rocm7-nightlies__fa1__longctx32768.log | 8 + ...ct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log | 8 + ...m7-nightlies__hblt0__fa1__longctx32768.log | 8 + ...3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log | 8 + ..._K_M__vulkan_amdvlk__fa1__longctx32768.log | 8 + ...-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log | 8 + ...Q4_K_M__vulkan_radv__fa1__longctx32768.log | 8 + ...-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log | 8 + ...-of-00002__rocm-7_2__fa1__longctx32768.log | 8 + ...L-00001-of-00002__rocm-7_2__hblt0__fa1.log | 8 + ...02__rocm-7_2__hblt0__fa1__longctx32768.log | 8 + ...Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log | 8 + ...of-00002__rocm6_4_4__fa1__longctx32768.log | 8 + ...-00001-of-00002__rocm6_4_4__hblt0__fa1.log | 8 + ...2__rocm6_4_4__hblt0__fa1__longctx32768.log | 8 + ...L-00001-of-00002__rocm7-nightlies__fa1.log | 8 + ...02__rocm7-nightlies__fa1__longctx32768.log | 8 + ...-of-00002__rocm7-nightlies__hblt0__fa1.log | 8 + ...m7-nightlies__hblt0__fa1__longctx32768.log | 8 + ..._XL-00001-of-00002__vulkan_amdvlk__fa1.log | 8 + ...0002__vulkan_amdvlk__fa1__longctx32768.log | 8 + ..._K_XL-00001-of-00002__vulkan_radv__fa1.log | 8 + ...-00002__vulkan_radv__fa1__longctx32768.log | 8 + ...mma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1.log | 8 + ...D-Q8_K_XL__rocm-7_2__fa1__longctx32768.log | 8 + ...2b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log | 8 + ...XL__rocm-7_2__hblt0__fa1__longctx32768.log | 8 + ...ma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log | 8 + ...-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log | 8 + ...b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log | 8 + ...L__rocm6_4_4__hblt0__fa1__longctx32768.log | 8 + ...2b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log | 8 + ...XL__rocm7-nightlies__fa1__longctx32768.log | 8 + ...D-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log | 8 + ...m7-nightlies__hblt0__fa1__longctx32768.log | 8 + ...-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log | 8 + ...K_XL__vulkan_amdvlk__fa1__longctx32768.log | 8 + ...-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log | 8 + ...8_K_XL__vulkan_radv__fa1__longctx32768.log | 8 + ...-it-BF16-00001-of-00002__rocm-7_2__fa1.log | 8 + ...-of-00002__rocm-7_2__fa1__longctx32768.log | 8 + ...6-00001-of-00002__rocm-7_2__hblt0__fa1.log | 8 + ...02__rocm-7_2__hblt0__fa1__longctx32768.log | 8 + ...it-BF16-00001-of-00002__rocm6_4_4__fa1.log | 8 + ...of-00002__rocm6_4_4__fa1__longctx32768.log | 8 + ...-00001-of-00002__rocm6_4_4__hblt0__fa1.log | 8 + ...2__rocm6_4_4__hblt0__fa1__longctx32768.log | 8 + ...6-00001-of-00002__rocm7-nightlies__fa1.log | 8 + ...02__rocm7-nightlies__fa1__longctx32768.log | 8 + ...-of-00002__rocm7-nightlies__hblt0__fa1.log | 8 + ...m7-nightlies__hblt0__fa1__longctx32768.log | 8 + ...F16-00001-of-00002__vulkan_amdvlk__fa1.log | 8 + ...0002__vulkan_amdvlk__fa1__longctx32768.log | 8 + ...-BF16-00001-of-00002__vulkan_radv__fa1.log | 8 + ...-00002__vulkan_radv__fa1__longctx32768.log | 8 + .../gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1.log | 8 + ...it-Q3_K_S__rocm-7_2__fa1__longctx32768.log | 8 + ...a-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1.log | 8 + ..._S__rocm-7_2__hblt0__fa1__longctx32768.log | 8 + .../gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log | 8 + ...t-Q3_K_S__rocm6_4_4__fa1__longctx32768.log | 8 + ...-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log | 8 + ...S__rocm6_4_4__hblt0__fa1__longctx32768.log | 8 + ...a-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log | 8 + ..._S__rocm7-nightlies__fa1__longctx32768.log | 8 + ...it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log | 8 + ...m7-nightlies__hblt0__fa1__longctx32768.log | 8 + ...mma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log | 8 + ..._K_S__vulkan_amdvlk__fa1__longctx32768.log | 8 + ...gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log | 8 + ...Q3_K_S__vulkan_radv__fa1__longctx32768.log | 8 + ...0b-mxfp4-00001-of-00003__rocm-7_2__fa1.log | 8 + ...-of-00003__rocm-7_2__fa1__longctx32768.log | 8 + ...4-00001-of-00003__rocm-7_2__hblt0__fa1.log | 8 + ...03__rocm-7_2__hblt0__fa1__longctx32768.log | 8 + ...b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log | 8 + ...of-00003__rocm6_4_4__fa1__longctx32768.log | 8 + ...-00001-of-00003__rocm6_4_4__hblt0__fa1.log | 8 + ...3__rocm6_4_4__hblt0__fa1__longctx32768.log | 8 + ...4-00001-of-00003__rocm7-nightlies__fa1.log | 8 + ...03__rocm7-nightlies__fa1__longctx32768.log | 8 + ...-of-00003__rocm7-nightlies__hblt0__fa1.log | 8 + ...m7-nightlies__hblt0__fa1__longctx32768.log | 8 + ...fp4-00001-of-00003__vulkan_amdvlk__fa1.log | 8 + ...0003__vulkan_amdvlk__fa1__longctx32768.log | 8 + ...mxfp4-00001-of-00003__vulkan_radv__fa1.log | 8 + ...-00003__vulkan_radv__fa1__longctx32768.log | 8 + .../gpt-oss-20b-mxfp4__rocm-7_2__fa1.log | 8 + ...20b-mxfp4__rocm-7_2__fa1__longctx32768.log | 8 + ...pt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1.log | 8 + ...p4__rocm-7_2__hblt0__fa1__longctx32768.log | 8 + .../gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log | 8 + ...0b-mxfp4__rocm6_4_4__fa1__longctx32768.log | 8 + ...t-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log | 8 + ...4__rocm6_4_4__hblt0__fa1__longctx32768.log | 8 + ...pt-oss-20b-mxfp4__rocm7-nightlies__fa1.log | 8 + ...p4__rocm7-nightlies__fa1__longctx32768.log | 8 + ...20b-mxfp4__rocm7-nightlies__hblt0__fa1.log | 8 + ...m7-nightlies__hblt0__fa1__longctx32768.log | 8 + .../gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log | 8 + ...xfp4__vulkan_amdvlk__fa1__longctx32768.log | 8 + .../gpt-oss-20b-mxfp4__vulkan_radv__fa1.log | 8 + ...-mxfp4__vulkan_radv__fa1__longctx32768.log | 8 + .../llama-2-7b.Q4_0__rocm-7_2__fa1.log | 8 + ...2-7b.Q4_0__rocm-7_2__fa1__longctx32768.log | 8 + .../llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1.log | 8 + ..._0__rocm-7_2__hblt0__fa1__longctx32768.log | 8 + .../llama-2-7b.Q4_0__rocm6_4_4__fa1.log | 8 + ...-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log | 8 + ...llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log | 8 + ...0__rocm6_4_4__hblt0__fa1__longctx32768.log | 8 + .../llama-2-7b.Q4_0__rocm7-nightlies__fa1.log | 8 + ..._0__rocm7-nightlies__fa1__longctx32768.log | 8 + ...2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log | 8 + ...m7-nightlies__hblt0__fa1__longctx32768.log | 8 + .../llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log | 8 + ...Q4_0__vulkan_amdvlk__fa1__longctx32768.log | 8 + .../llama-2-7b.Q4_0__vulkan_radv__fa1.log | 8 + ...b.Q4_0__vulkan_radv__fa1__longctx32768.log | 8 + benchmark/results/04-02-2026/system_info.json | 1 + ...-Q4_K_XL-00001-of-00002__rocm-7_2__fa1.log | 14 +- ...-of-00002__rocm-7_2__fa1__longctx32768.log | 14 +- ...L-00001-of-00002__rocm-7_2__hblt0__fa1.log | 14 +- ...02__rocm-7_2__hblt0__fa1__longctx32768.log | 14 +- ...Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log | 14 +- ...of-00002__rocm6_4_4__fa1__longctx32768.log | 14 +- ...-00001-of-00002__rocm6_4_4__hblt0__fa1.log | 14 +- ...2__rocm6_4_4__hblt0__fa1__longctx32768.log | 14 +- ...L-00001-of-00002__rocm7-nightlies__fa1.log | 14 +- ...02__rocm7-nightlies__fa1__longctx32768.log | 14 +- ...-of-00002__rocm7-nightlies__hblt0__fa1.log | 14 +- ...m7-nightlies__hblt0__fa1__longctx32768.log | 14 +- ..._XL-00001-of-00002__vulkan_amdvlk__fa1.log | 28 +- ...0002__vulkan_amdvlk__fa1__longctx32768.log | 27 +- ..._K_XL-00001-of-00002__vulkan_radv__fa1.log | 10 +- ...-00002__vulkan_radv__fa1__longctx32768.log | 10 +- ...ash-BF16-00001-of-00002__rocm-7_2__fa1.log | 14 +- ...-of-00002__rocm-7_2__fa1__longctx32768.log | 14 +- ...6-00001-of-00002__rocm-7_2__hblt0__fa1.log | 14 +- ...02__rocm-7_2__hblt0__fa1__longctx32768.log | 14 +- ...sh-BF16-00001-of-00002__rocm6_4_4__fa1.log | 14 +- ...of-00002__rocm6_4_4__fa1__longctx32768.log | 14 +- ...-00001-of-00002__rocm6_4_4__hblt0__fa1.log | 14 +- ...2__rocm6_4_4__hblt0__fa1__longctx32768.log | 14 +- ...6-00001-of-00002__rocm7-nightlies__fa1.log | 14 +- ...02__rocm7-nightlies__fa1__longctx32768.log | 14 +- ...-of-00002__rocm7-nightlies__hblt0__fa1.log | 14 +- ...m7-nightlies__hblt0__fa1__longctx32768.log | 14 +- ...F16-00001-of-00002__vulkan_amdvlk__fa1.log | 10 +- ...0002__vulkan_amdvlk__fa1__longctx32768.log | 10 +- ...-BF16-00001-of-00002__vulkan_radv__fa1.log | 10 +- ...-00002__vulkan_radv__fa1__longctx32768.log | 10 +- ...LM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1.log | 14 +- ...D-Q8_K_XL__rocm-7_2__fa1__longctx32768.log | 14 +- ...Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log | 14 +- ...XL__rocm-7_2__hblt0__fa1__longctx32768.log | 14 +- ...M-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log | 14 +- ...-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log | 14 +- ...lash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log | 14 +- ...L__rocm6_4_4__hblt0__fa1__longctx32768.log | 14 +- ...Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log | 14 +- ...XL__rocm7-nightlies__fa1__longctx32768.log | 14 +- ...D-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log | 14 +- ...m7-nightlies__hblt0__fa1__longctx32768.log | 14 +- ...7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log | 10 +- ...K_XL__vulkan_amdvlk__fa1__longctx32768.log | 27 +- ...4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log | 10 +- ...8_K_XL__vulkan_radv__fa1__longctx32768.log | 10 +- ...-14B-Instruct-2512-BF16__rocm-7_2__fa1.log | 14 +- ...2512-BF16__rocm-7_2__fa1__longctx32768.log | 14 +- ...struct-2512-BF16__rocm-7_2__hblt0__fa1.log | 14 +- ...16__rocm-7_2__hblt0__fa1__longctx32768.log | 14 +- ...14B-Instruct-2512-BF16__rocm6_4_4__fa1.log | 14 +- ...512-BF16__rocm6_4_4__fa1__longctx32768.log | 14 +- ...truct-2512-BF16__rocm6_4_4__hblt0__fa1.log | 14 +- ...6__rocm6_4_4__hblt0__fa1__longctx32768.log | 14 +- ...struct-2512-BF16__rocm7-nightlies__fa1.log | 14 +- ...16__rocm7-nightlies__fa1__longctx32768.log | 14 +- ...2512-BF16__rocm7-nightlies__hblt0__fa1.log | 14 +- ...m7-nightlies__hblt0__fa1__longctx32768.log | 14 +- ...Instruct-2512-BF16__vulkan_amdvlk__fa1.log | 27 +- ...BF16__vulkan_amdvlk__fa1__longctx32768.log | 27 +- ...B-Instruct-2512-BF16__vulkan_radv__fa1.log | 10 +- ...2-BF16__vulkan_radv__fa1__longctx32768.log | 10 +- ...-Q4_K_XL-00001-of-00003__rocm-7_2__fa1.log | 8 + ...-of-00003__rocm-7_2__fa1__longctx32768.log | 8 + ...L-00001-of-00003__rocm-7_2__hblt0__fa1.log | 8 + ...03__rocm-7_2__hblt0__fa1__longctx32768.log | 8 + ...Q4_K_XL-00001-of-00003__rocm6_4_4__fa1.log | 8 + ...of-00003__rocm6_4_4__fa1__longctx32768.log | 8 + ...-00001-of-00003__rocm6_4_4__hblt0__fa1.log | 8 + ...3__rocm6_4_4__hblt0__fa1__longctx32768.log | 8 + ...L-00001-of-00003__rocm7-nightlies__fa1.log | 8 + ...03__rocm7-nightlies__fa1__longctx32768.log | 8 + ...-of-00003__rocm7-nightlies__hblt0__fa1.log | 8 + ...m7-nightlies__hblt0__fa1__longctx32768.log | 8 + ..._XL-00001-of-00003__vulkan_amdvlk__fa1.log | 8 + ...0003__vulkan_amdvlk__fa1__longctx32768.log | 8 + ..._K_XL-00001-of-00003__vulkan_radv__fa1.log | 8 + ...-00003__vulkan_radv__fa1__longctx32768.log | 8 + ...-Q3_K_XL-00001-of-00003__rocm-7_2__fa1.log | 14 +- ...-of-00003__rocm-7_2__fa1__longctx32768.log | 14 +- ...L-00001-of-00003__rocm-7_2__hblt0__fa1.log | 14 +- ...03__rocm-7_2__hblt0__fa1__longctx32768.log | 14 +- ...Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log | 14 +- ...of-00003__rocm6_4_4__fa1__longctx32768.log | 14 +- ...-00001-of-00003__rocm6_4_4__hblt0__fa1.log | 14 +- ...3__rocm6_4_4__hblt0__fa1__longctx32768.log | 14 +- ...L-00001-of-00003__rocm7-nightlies__fa1.log | 14 +- ...03__rocm7-nightlies__fa1__longctx32768.log | 14 +- ...-of-00003__rocm7-nightlies__hblt0__fa1.log | 14 +- ...m7-nightlies__hblt0__fa1__longctx32768.log | 14 +- ..._XL-00001-of-00003__vulkan_amdvlk__fa1.log | 10 +- ...0003__vulkan_amdvlk__fa1__longctx32768.log | 10 +- ..._K_XL-00001-of-00003__vulkan_radv__fa1.log | 10 +- ...-00003__vulkan_radv__fa1__longctx32768.log | 10 +- ...A3B-BF16-00001-of-00002__rocm-7_2__fa1.log | 14 +- ...-of-00002__rocm-7_2__fa1__longctx32768.log | 14 +- ...6-00001-of-00002__rocm-7_2__hblt0__fa1.log | 14 +- ...02__rocm-7_2__hblt0__fa1__longctx32768.log | 14 +- ...3B-BF16-00001-of-00002__rocm6_4_4__fa1.log | 14 +- ...of-00002__rocm6_4_4__fa1__longctx32768.log | 14 +- ...-00001-of-00002__rocm6_4_4__hblt0__fa1.log | 14 +- ...2__rocm6_4_4__hblt0__fa1__longctx32768.log | 14 +- ...6-00001-of-00002__rocm7-nightlies__fa1.log | 14 +- ...02__rocm7-nightlies__fa1__longctx32768.log | 14 +- ...-of-00002__rocm7-nightlies__hblt0__fa1.log | 14 +- ...m7-nightlies__hblt0__fa1__longctx32768.log | 14 +- ...F16-00001-of-00002__vulkan_amdvlk__fa1.log | 10 +- ...0002__vulkan_amdvlk__fa1__longctx32768.log | 10 +- ...-BF16-00001-of-00002__vulkan_radv__fa1.log | 10 +- ...-00002__vulkan_radv__fa1__longctx32768.log | 10 +- ...nstruct-2507-UD-Q6_K_XL__rocm-7_2__fa1.log | 14 +- ...D-Q6_K_XL__rocm-7_2__fa1__longctx32768.log | 14 +- ...-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1.log | 14 +- ...XL__rocm-7_2__hblt0__fa1__longctx32768.log | 14 +- ...struct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log | 14 +- ...-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log | 14 +- ...2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log | 14 +- ...L__rocm6_4_4__hblt0__fa1__longctx32768.log | 14 +- ...-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log | 14 +- ...XL__rocm7-nightlies__fa1__longctx32768.log | 14 +- ...D-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log | 14 +- ...m7-nightlies__hblt0__fa1__longctx32768.log | 14 +- ...ct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log | 10 +- ...K_XL__vulkan_amdvlk__fa1__longctx32768.log | 10 +- ...ruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log | 10 +- ...6_K_XL__vulkan_radv__fa1__longctx32768.log | 10 +- ...30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1.log | 14 +- ...ct-Q4_K_M__rocm-7_2__fa1__longctx32768.log | 14 +- ...-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1.log | 14 +- ..._M__rocm-7_2__hblt0__fa1__longctx32768.log | 14 +- ...0B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log | 14 +- ...t-Q4_K_M__rocm6_4_4__fa1__longctx32768.log | 14 +- ...Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log | 14 +- ...M__rocm6_4_4__hblt0__fa1__longctx32768.log | 14 +- ...-Instruct-Q4_K_M__rocm7-nightlies__fa1.log | 14 +- ..._M__rocm7-nightlies__fa1__longctx32768.log | 14 +- ...ct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log | 14 +- ...m7-nightlies__hblt0__fa1__longctx32768.log | 14 +- ...3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log | 10 +- ..._K_M__vulkan_amdvlk__fa1__longctx32768.log | 10 +- ...-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log | 10 +- ...Q4_K_M__vulkan_radv__fa1__longctx32768.log | 10 +- ...-Q8_K_XL-00001-of-00003__rocm-7_2__fa1.log | 8 + ...-of-00003__rocm-7_2__fa1__longctx32768.log | 8 + ...L-00001-of-00003__rocm-7_2__hblt0__fa1.log | 8 + ...03__rocm-7_2__hblt0__fa1__longctx32768.log | 8 + ...Q8_K_XL-00001-of-00003__rocm6_4_4__fa1.log | 8 + ...of-00003__rocm6_4_4__fa1__longctx32768.log | 8 + ...-00001-of-00003__rocm6_4_4__hblt0__fa1.log | 8 + ...3__rocm6_4_4__hblt0__fa1__longctx32768.log | 8 + ...L-00001-of-00003__rocm7-nightlies__fa1.log | 8 + ...03__rocm7-nightlies__fa1__longctx32768.log | 8 + ...-of-00003__rocm7-nightlies__hblt0__fa1.log | 8 + ...m7-nightlies__hblt0__fa1__longctx32768.log | 8 + ..._XL-00001-of-00003__vulkan_amdvlk__fa1.log | 8 + ...0003__vulkan_amdvlk__fa1__longctx32768.log | 8 + ..._K_XL-00001-of-00003__vulkan_radv__fa1.log | 8 + ...-00003__vulkan_radv__fa1__longctx32768.log | 8 + ...-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log | 14 +- ...-of-00002__rocm-7_2__fa1__longctx32768.log | 14 +- ...L-00001-of-00002__rocm-7_2__hblt0__fa1.log | 14 +- ...02__rocm-7_2__hblt0__fa1__longctx32768.log | 14 +- ...Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log | 14 +- ...of-00002__rocm6_4_4__fa1__longctx32768.log | 14 +- ...-00001-of-00002__rocm6_4_4__hblt0__fa1.log | 14 +- ...2__rocm6_4_4__hblt0__fa1__longctx32768.log | 14 +- ...L-00001-of-00002__rocm7-nightlies__fa1.log | 14 +- ...02__rocm7-nightlies__fa1__longctx32768.log | 14 +- ...-of-00002__rocm7-nightlies__hblt0__fa1.log | 14 +- ...m7-nightlies__hblt0__fa1__longctx32768.log | 14 +- ..._XL-00001-of-00002__vulkan_amdvlk__fa1.log | 10 +- ...0002__vulkan_amdvlk__fa1__longctx32768.log | 10 +- ..._K_XL-00001-of-00002__vulkan_radv__fa1.log | 10 +- ...-00002__vulkan_radv__fa1__longctx32768.log | 10 +- ...-Q5_K_XL-00001-of-00003__rocm-7_2__fa1.log | 8 + ...-of-00003__rocm-7_2__fa1__longctx32768.log | 8 + ...L-00001-of-00003__rocm-7_2__hblt0__fa1.log | 8 + ...03__rocm-7_2__hblt0__fa1__longctx32768.log | 8 + ...Q5_K_XL-00001-of-00003__rocm6_4_4__fa1.log | 8 + ...of-00003__rocm6_4_4__fa1__longctx32768.log | 8 + ...-00001-of-00003__rocm6_4_4__hblt0__fa1.log | 8 + ...3__rocm6_4_4__hblt0__fa1__longctx32768.log | 8 + ...L-00001-of-00003__rocm7-nightlies__fa1.log | 8 + ...03__rocm7-nightlies__fa1__longctx32768.log | 8 + ...-of-00003__rocm7-nightlies__hblt0__fa1.log | 8 + ...m7-nightlies__hblt0__fa1__longctx32768.log | 8 + ..._XL-00001-of-00003__vulkan_amdvlk__fa1.log | 8 + ...0003__vulkan_amdvlk__fa1__longctx32768.log | 8 + ..._K_XL-00001-of-00003__vulkan_radv__fa1.log | 8 + ...-00003__vulkan_radv__fa1__longctx32768.log | 8 + ...A3B-BF16-00001-of-00002__rocm-7_2__fa1.log | 8 + ...-of-00002__rocm-7_2__fa1__longctx32768.log | 8 + ...6-00001-of-00002__rocm-7_2__hblt0__fa1.log | 8 + ...02__rocm-7_2__hblt0__fa1__longctx32768.log | 8 + ...3B-BF16-00001-of-00002__rocm6_4_4__fa1.log | 8 + ...of-00002__rocm6_4_4__fa1__longctx32768.log | 8 + ...-00001-of-00002__rocm6_4_4__hblt0__fa1.log | 8 + ...2__rocm6_4_4__hblt0__fa1__longctx32768.log | 8 + ...6-00001-of-00002__rocm7-nightlies__fa1.log | 8 + ...02__rocm7-nightlies__fa1__longctx32768.log | 8 + ...-of-00002__rocm7-nightlies__hblt0__fa1.log | 8 + ...m7-nightlies__hblt0__fa1__longctx32768.log | 8 + ...F16-00001-of-00002__vulkan_amdvlk__fa1.log | 8 + ...0002__vulkan_amdvlk__fa1__longctx32768.log | 8 + ...-BF16-00001-of-00002__vulkan_radv__fa1.log | 8 + ...-00002__vulkan_radv__fa1__longctx32768.log | 8 + ...n3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__fa1.log | 8 + ...D-Q4_K_XL__rocm-7_2__fa1__longctx32768.log | 8 + ...B-A3B-UD-Q4_K_XL__rocm-7_2__hblt0__fa1.log | 8 + ...XL__rocm-7_2__hblt0__fa1__longctx32768.log | 8 + ...3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1.log | 8 + ...-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log | 8 + ...-A3B-UD-Q4_K_XL__rocm6_4_4__hblt0__fa1.log | 8 + ...L__rocm6_4_4__hblt0__fa1__longctx32768.log | 8 + ...B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1.log | 8 + ...XL__rocm7-nightlies__fa1__longctx32768.log | 8 + ...D-Q4_K_XL__rocm7-nightlies__hblt0__fa1.log | 8 + ...m7-nightlies__hblt0__fa1__longctx32768.log | 8 + ...35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1.log | 8 + ...K_XL__vulkan_amdvlk__fa1__longctx32768.log | 8 + ...5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1.log | 8 + ...4_K_XL__vulkan_radv__fa1__longctx32768.log | 8 + ...mma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1.log | 14 +- ...D-Q8_K_XL__rocm-7_2__fa1__longctx32768.log | 14 +- ...2b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log | 14 +- ...XL__rocm-7_2__hblt0__fa1__longctx32768.log | 14 +- ...ma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log | 14 +- ...-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log | 14 +- ...b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log | 14 +- ...L__rocm6_4_4__hblt0__fa1__longctx32768.log | 14 +- ...2b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log | 14 +- ...XL__rocm7-nightlies__fa1__longctx32768.log | 14 +- ...D-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log | 14 +- ...m7-nightlies__hblt0__fa1__longctx32768.log | 14 +- ...-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log | 10 +- ...K_XL__vulkan_amdvlk__fa1__longctx32768.log | 10 +- ...-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log | 10 +- ...8_K_XL__vulkan_radv__fa1__longctx32768.log | 10 +- ...-it-BF16-00001-of-00002__rocm-7_2__fa1.log | 14 +- ...-of-00002__rocm-7_2__fa1__longctx32768.log | 14 +- ...6-00001-of-00002__rocm-7_2__hblt0__fa1.log | 14 +- ...02__rocm-7_2__hblt0__fa1__longctx32768.log | 14 +- ...it-BF16-00001-of-00002__rocm6_4_4__fa1.log | 14 +- ...of-00002__rocm6_4_4__fa1__longctx32768.log | 14 +- ...-00001-of-00002__rocm6_4_4__hblt0__fa1.log | 14 +- ...2__rocm6_4_4__hblt0__fa1__longctx32768.log | 14 +- ...6-00001-of-00002__rocm7-nightlies__fa1.log | 14 +- ...02__rocm7-nightlies__fa1__longctx32768.log | 14 +- ...-of-00002__rocm7-nightlies__hblt0__fa1.log | 14 +- ...m7-nightlies__hblt0__fa1__longctx32768.log | 14 +- ...F16-00001-of-00002__vulkan_amdvlk__fa1.log | 27 +- ...0002__vulkan_amdvlk__fa1__longctx32768.log | 27 +- ...-BF16-00001-of-00002__vulkan_radv__fa1.log | 10 +- ...-00002__vulkan_radv__fa1__longctx32768.log | 10 +- .../gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1.log | 14 +- ...it-Q3_K_S__rocm-7_2__fa1__longctx32768.log | 14 +- ...a-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1.log | 14 +- ..._S__rocm-7_2__hblt0__fa1__longctx32768.log | 14 +- .../gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log | 14 +- ...t-Q3_K_S__rocm6_4_4__fa1__longctx32768.log | 14 +- ...-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log | 14 +- ...S__rocm6_4_4__hblt0__fa1__longctx32768.log | 14 +- ...a-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log | 14 +- ..._S__rocm7-nightlies__fa1__longctx32768.log | 14 +- ...it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log | 14 +- ...m7-nightlies__hblt0__fa1__longctx32768.log | 14 +- ...mma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log | 10 +- ..._K_S__vulkan_amdvlk__fa1__longctx32768.log | 10 +- ...gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log | 10 +- ...Q3_K_S__vulkan_radv__fa1__longctx32768.log | 10 +- ...0b-mxfp4-00001-of-00003__rocm-7_2__fa1.log | 14 +- ...-of-00003__rocm-7_2__fa1__longctx32768.log | 14 +- ...4-00001-of-00003__rocm-7_2__hblt0__fa1.log | 14 +- ...03__rocm-7_2__hblt0__fa1__longctx32768.log | 14 +- ...b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log | 14 +- ...of-00003__rocm6_4_4__fa1__longctx32768.log | 14 +- ...-00001-of-00003__rocm6_4_4__hblt0__fa1.log | 14 +- ...3__rocm6_4_4__hblt0__fa1__longctx32768.log | 14 +- ...4-00001-of-00003__rocm7-nightlies__fa1.log | 14 +- ...03__rocm7-nightlies__fa1__longctx32768.log | 14 +- ...-of-00003__rocm7-nightlies__hblt0__fa1.log | 14 +- ...m7-nightlies__hblt0__fa1__longctx32768.log | 14 +- ...fp4-00001-of-00003__vulkan_amdvlk__fa1.log | 10 +- ...0003__vulkan_amdvlk__fa1__longctx32768.log | 10 +- ...mxfp4-00001-of-00003__vulkan_radv__fa1.log | 10 +- ...-00003__vulkan_radv__fa1__longctx32768.log | 10 +- .../gpt-oss-20b-mxfp4__rocm-7_2__fa1.log | 14 +- ...20b-mxfp4__rocm-7_2__fa1__longctx32768.log | 14 +- ...pt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1.log | 14 +- ...p4__rocm-7_2__hblt0__fa1__longctx32768.log | 14 +- .../gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log | 14 +- ...0b-mxfp4__rocm6_4_4__fa1__longctx32768.log | 14 +- ...t-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log | 14 +- ...4__rocm6_4_4__hblt0__fa1__longctx32768.log | 14 +- ...pt-oss-20b-mxfp4__rocm7-nightlies__fa1.log | 14 +- ...p4__rocm7-nightlies__fa1__longctx32768.log | 14 +- ...20b-mxfp4__rocm7-nightlies__hblt0__fa1.log | 14 +- ...m7-nightlies__hblt0__fa1__longctx32768.log | 14 +- .../gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log | 10 +- ...xfp4__vulkan_amdvlk__fa1__longctx32768.log | 10 +- .../gpt-oss-20b-mxfp4__vulkan_radv__fa1.log | 10 +- ...-mxfp4__vulkan_radv__fa1__longctx32768.log | 10 +- .../llama-2-7b.Q4_0__rocm-7_2__fa1.log | 14 +- ...2-7b.Q4_0__rocm-7_2__fa1__longctx32768.log | 14 +- .../llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1.log | 14 +- ..._0__rocm-7_2__hblt0__fa1__longctx32768.log | 14 +- .../llama-2-7b.Q4_0__rocm6_4_4__fa1.log | 14 +- ...-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log | 14 +- ...llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log | 14 +- ...0__rocm6_4_4__hblt0__fa1__longctx32768.log | 14 +- .../llama-2-7b.Q4_0__rocm7-nightlies__fa1.log | 14 +- ..._0__rocm7-nightlies__fa1__longctx32768.log | 14 +- ...2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log | 14 +- ...m7-nightlies__hblt0__fa1__longctx32768.log | 14 +- .../llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log | 10 +- ...Q4_0__vulkan_amdvlk__fa1__longctx32768.log | 10 +- .../llama-2-7b.Q4_0__vulkan_radv__fa1.log | 10 +- ...b.Q4_0__vulkan_radv__fa1__longctx32768.log | 10 +- benchmark/results/system_info.json | 2 +- docs/results.json | 28361 +++++++++------- 595 files changed, 19653 insertions(+), 14511 deletions(-) create mode 100644 benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1.log create mode 100644 benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log create mode 100644 benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log create mode 100644 benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1.log create mode 100644 benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1.log create mode 100644 benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1.log create mode 100644 benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1.log create mode 100644 benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log create mode 100644 benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log create mode 100644 benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log rename benchmark/results/{ => 04-02-2026}/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log (100%) rename benchmark/results/{ => 04-02-2026}/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log (100%) rename benchmark/results/{ => 04-02-2026}/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log (100%) rename benchmark/results/{ => 04-02-2026}/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log (100%) rename benchmark/results/{ => 04-02-2026}/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log (100%) rename benchmark/results/{ => 04-02-2026}/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log (100%) rename benchmark/results/{ => 04-02-2026}/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log (100%) rename benchmark/results/{ => 04-02-2026}/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log (100%) rename benchmark/results/{ => 04-02-2026}/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log (100%) rename benchmark/results/{ => 04-02-2026}/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log (100%) rename benchmark/results/{ => 04-02-2026}/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log (100%) rename benchmark/results/{ => 04-02-2026}/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log (100%) rename benchmark/results/{ => 04-02-2026}/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log (100%) rename benchmark/results/{ => 04-02-2026}/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log (100%) rename benchmark/results/{ => 04-02-2026}/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log (100%) rename benchmark/results/{ => 04-02-2026}/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1.log create mode 100644 benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log create mode 100644 benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log create mode 100644 benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log rename benchmark/results/{ => 04-02-2026}/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__fa1.log (100%) rename benchmark/results/{ => 04-02-2026}/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log (100%) rename benchmark/results/{ => 04-02-2026}/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log (100%) rename benchmark/results/{ => 04-02-2026}/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log (100%) rename benchmark/results/{ => 04-02-2026}/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log (100%) rename benchmark/results/{ => 04-02-2026}/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log (100%) rename benchmark/results/{ => 04-02-2026}/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log (100%) rename benchmark/results/{ => 04-02-2026}/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log (100%) rename benchmark/results/{ => 04-02-2026}/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log (100%) rename benchmark/results/{ => 04-02-2026}/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log (100%) rename benchmark/results/{ => 04-02-2026}/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log (100%) rename benchmark/results/{ => 04-02-2026}/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log (100%) rename benchmark/results/{ => 04-02-2026}/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log (100%) rename benchmark/results/{ => 04-02-2026}/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log (100%) rename benchmark/results/{ => 04-02-2026}/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log (100%) rename benchmark/results/{ => 04-02-2026}/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log (100%) create mode 100644 benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1.log create mode 100644 benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log create mode 100644 benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log create mode 100644 benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log create mode 100644 benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log create mode 100644 benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log create mode 100644 benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1.log create mode 100644 benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log create mode 100644 benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log create mode 100644 benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1.log create mode 100644 benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log create mode 100644 benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log create mode 100644 benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log create mode 100644 benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log create mode 100644 benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log create mode 100644 benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1.log create mode 100644 benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log create mode 100644 benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log create mode 100644 benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1.log create mode 100644 benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log create mode 100644 benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log create mode 100644 benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1.log create mode 100644 benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log create mode 100644 benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log create mode 100644 benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1.log create mode 100644 benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log create mode 100644 benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log create mode 100644 benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm-7_2__fa1.log create mode 100644 benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm-7_2__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log create mode 100644 benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log create mode 100644 benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm-7_2__fa1.log create mode 100644 benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm-7_2__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1.log create mode 100644 benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log create mode 100644 benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/04-02-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/llama-2-7b.Q4_0__vulkan_radv__fa1.log create mode 100644 benchmark/results/04-02-2026/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log create mode 100644 benchmark/results/04-02-2026/system_info.json create mode 100644 benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__fa1.log create mode 100644 benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log create mode 100644 benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log create mode 100644 benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1.log create mode 100644 benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log create mode 100644 benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log create mode 100644 benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1.log create mode 100644 benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__fa1.log create mode 100644 benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log create mode 100644 benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__fa1.log create mode 100644 benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log create mode 100644 benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log create mode 100644 benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_radv__fa1.log create mode 100644 benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__fa1.log create mode 100644 benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log create mode 100644 benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1.log create mode 100644 benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log create mode 100644 benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log create mode 100644 benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1.log create mode 100644 benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__fa1.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__hblt0__fa1.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__hblt0__fa1.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__hblt0__fa1.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1.log create mode 100644 benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log diff --git a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1.log b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1.log new file mode 100644 index 0000000..07bbca3 --- /dev/null +++ b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 72.93 ± 0.06 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.95 ± 0.06 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log new file mode 100644 index 0000000..d3dcb44 --- /dev/null +++ b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 16.57 ± 0.04 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.08 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log new file mode 100644 index 0000000..dc35095 --- /dev/null +++ b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 71.58 ± 0.06 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.99 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..914ebcc --- /dev/null +++ b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 16.48 ± 0.03 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.07 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log new file mode 100644 index 0000000..b4213c1 --- /dev/null +++ b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 79.51 ± 0.07 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.72 ± 0.10 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..86f7ce7 --- /dev/null +++ b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 17.57 ± 0.05 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.15 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..2c8897a --- /dev/null +++ b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 79.24 ± 0.10 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.67 ± 0.11 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..77126a6 --- /dev/null +++ b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 17.53 ± 0.03 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.15 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..8e9d026 --- /dev/null +++ b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 78.28 ± 0.06 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.98 ± 0.04 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..b8f691a --- /dev/null +++ b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 17.18 ± 0.03 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.06 ± 0.02 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..baf867e --- /dev/null +++ b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 80.59 ± 0.10 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.99 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..108f8a6 --- /dev/null +++ b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 17.27 ± 0.02 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.07 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..3836dd7 --- /dev/null +++ b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | pp512 | 17.65 ± 0.01 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | tg128 | 3.00 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..b2e6658 --- /dev/null +++ b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 4.94 ± 0.01 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | tg32 @ d32768 | 1.69 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log new file mode 100644 index 0000000..aeb25b4 --- /dev/null +++ b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | pp512 | 54.76 ± 11.46 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | tg128 | 3.00 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..31ef62e --- /dev/null +++ b/benchmark/results/04-02-2026/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 7.15 ± 0.02 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | tg32 @ d32768 | 2.27 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1.log new file mode 100644 index 0000000..455d013 --- /dev/null +++ b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 454.95 ± 1.90 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 22.26 ± 0.03 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log new file mode 100644 index 0000000..71489ab --- /dev/null +++ b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 94.79 ± 0.56 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 16.48 ± 0.09 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log new file mode 100644 index 0000000..e9729d2 --- /dev/null +++ b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 425.21 ± 1.79 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 23.41 ± 0.03 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..cd27b5f --- /dev/null +++ b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 93.83 ± 0.40 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 16.55 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1.log new file mode 100644 index 0000000..d58dab9 --- /dev/null +++ b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 407.15 ± 2.05 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 21.51 ± 0.03 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..aa07720 --- /dev/null +++ b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 101.09 ± 0.37 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 16.23 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..f69d3f6 --- /dev/null +++ b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 414.23 ± 2.09 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 23.11 ± 0.03 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..6b8851c --- /dev/null +++ b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 100.06 ± 0.38 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 15.97 ± 0.45 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..5c3d51b --- /dev/null +++ b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 489.62 ± 3.63 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 23.40 ± 0.02 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..718fbd4 --- /dev/null +++ b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 92.48 ± 1.13 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 16.50 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..45c08f7 --- /dev/null +++ b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 425.86 ± 2.29 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 23.41 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..9bd643e --- /dev/null +++ b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 92.06 ± 0.08 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 16.51 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..f8630c5 --- /dev/null +++ b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | pp512 | 106.42 ± 0.08 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | tg128 | 10.87 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..a025866 --- /dev/null +++ b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 6.09 ± 0.00 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | tg32 @ d32768 | 8.28 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1.log new file mode 100644 index 0000000..f2c87cb --- /dev/null +++ b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | pp512 | 333.10 ± 6.48 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | tg128 | 9.51 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..ad53f7f --- /dev/null +++ b/benchmark/results/04-02-2026/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 78.99 ± 0.25 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | tg32 @ d32768 | 8.13 ± 0.02 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1.log new file mode 100644 index 0000000..01e9227 --- /dev/null +++ b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 398.34 ± 1.32 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 35.94 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log new file mode 100644 index 0000000..e85162c --- /dev/null +++ b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 90.22 ± 4.88 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 22.35 ± 0.04 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log new file mode 100644 index 0000000..da85676 --- /dev/null +++ b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 398.87 ± 1.21 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 36.09 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..40cfc38 --- /dev/null +++ b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 92.13 ± 0.15 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 21.56 ± 1.34 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log new file mode 100644 index 0000000..a68e922 --- /dev/null +++ b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 947.86 ± 2.03 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 33.77 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..11651a6 --- /dev/null +++ b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 84.85 ± 1.04 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 21.89 ± 0.04 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..d44f270 --- /dev/null +++ b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 952.84 ± 2.21 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 35.23 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..3627139 --- /dev/null +++ b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 84.01 ± 0.58 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 21.97 ± 0.04 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..35840f2 --- /dev/null +++ b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 983.72 ± 3.21 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 36.20 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..7c3adf6 --- /dev/null +++ b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 80.32 ± 1.28 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 22.31 ± 0.04 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..3d7dec0 --- /dev/null +++ b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 955.10 ± 4.53 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 36.16 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..8bd5686 --- /dev/null +++ b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 81.34 ± 1.80 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 22.32 ± 0.04 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..5754f88 --- /dev/null +++ b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | pp512 | 368.78 ± 0.17 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | tg128 | 40.80 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..58b0878 --- /dev/null +++ b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 6.35 ± 0.00 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | tg32 @ d32768 | 18.75 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log new file mode 100644 index 0000000..8c3838a --- /dev/null +++ b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | pp512 | 877.18 ± 8.15 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | tg128 | 40.07 ± 0.78 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..f135b33 --- /dev/null +++ b/benchmark/results/04-02-2026/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 90.27 ± 0.42 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | tg32 @ d32768 | 23.07 ± 0.03 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log b/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log rename to benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log b/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log rename to benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log b/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log rename to benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log rename to benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log rename to benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log rename to benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log rename to benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log rename to benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log rename to benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log rename to benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log rename to benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log rename to benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log rename to benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log rename to benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log rename to benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log rename to benchmark/results/04-02-2026/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log diff --git a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1.log b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1.log new file mode 100644 index 0000000..df4a892 --- /dev/null +++ b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 800.17 ± 1.72 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.49 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1__longctx32768.log b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1__longctx32768.log new file mode 100644 index 0000000..f184a50 --- /dev/null +++ b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 169.18 ± 1.16 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.11 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1.log b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1.log new file mode 100644 index 0000000..32590b1 --- /dev/null +++ b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 803.22 ± 2.21 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.49 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..505df24 --- /dev/null +++ b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 170.11 ± 0.81 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.11 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log new file mode 100644 index 0000000..8118e71 --- /dev/null +++ b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 764.18 ± 1.66 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.48 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..f3f55d3 --- /dev/null +++ b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 166.22 ± 1.20 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.10 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..661b27c --- /dev/null +++ b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 766.68 ± 1.07 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.48 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..6cd94b5 --- /dev/null +++ b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 164.84 ± 1.99 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.10 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..1ef3f29 --- /dev/null +++ b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 990.88 ± 3.15 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.50 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..5b40f9e --- /dev/null +++ b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 172.42 ± 3.61 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.10 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..545b269 --- /dev/null +++ b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 799.71 ± 2.09 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.49 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..5974e95 --- /dev/null +++ b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 170.19 ± 1.69 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.10 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..3c69e17 --- /dev/null +++ b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | pp512 | 19.70 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | tg128 | 8.24 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..f5ad8c4 --- /dev/null +++ b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 16.69 ± 0.01 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | tg32 @ d32768 | 6.41 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log new file mode 100644 index 0000000..e04be5a --- /dev/null +++ b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | pp512 | 222.01 ± 0.94 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | tg128 | 7.59 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..d5938e7 --- /dev/null +++ b/benchmark/results/04-02-2026/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 76.47 ± 0.38 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | tg32 @ d32768 | 6.39 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__fa1.log b/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__fa1.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__fa1.log rename to benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__fa1.log diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log b/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log rename to benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log b/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log rename to benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log rename to benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log b/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log rename to benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log rename to benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log rename to benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log rename to benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log rename to benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log rename to benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log rename to benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log rename to benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log rename to benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log rename to benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log rename to benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log similarity index 100% rename from benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log rename to benchmark/results/04-02-2026/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log diff --git a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1.log b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1.log new file mode 100644 index 0000000..682b041 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | pp512 | 202.36 ± 3.50 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | tg128 | 15.80 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log new file mode 100644 index 0000000..9057c7d --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 41.36 ± 0.87 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 9.65 ± 0.30 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log new file mode 100644 index 0000000..366d2a3 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | pp512 | 200.10 ± 8.37 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | tg128 | 16.04 ± 0.06 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..cb8f1d5 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 41.53 ± 0.41 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 9.50 ± 0.75 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log new file mode 100644 index 0000000..a353c9d --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | pp512 | 205.05 ± 3.62 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | tg128 | 14.98 ± 0.03 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..e89ce17 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 51.11 ± 0.62 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 9.63 ± 0.07 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..50fa18d --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | pp512 | 203.41 ± 3.52 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | tg128 | 15.00 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..e681c9d --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 51.19 ± 0.64 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 9.58 ± 0.10 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..803ea12 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | pp512 | 200.04 ± 4.11 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | tg128 | 16.19 ± 0.09 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..7bd5d26 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 41.42 ± 0.37 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 10.94 ± 0.42 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..291f2c8 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | pp512 | 197.48 ± 10.80 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | tg128 | 16.20 ± 0.08 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..a79a619 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 41.60 ± 0.36 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 10.81 ± 0.61 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..5264d6e --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | pp512 | 119.82 ± 3.30 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | tg128 | 17.75 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..8ecec15 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 27.41 ± 0.01 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | tg32 @ d32768 | 3.42 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log new file mode 100644 index 0000000..67b83cc --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | pp512 | 133.28 ± 1.45 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | tg128 | 15.98 ± 0.25 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..7945690 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 30.79 ± 0.06 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | tg32 @ d32768 | 6.50 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log new file mode 100644 index 0000000..f8f2cd5 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 489.11 ± 2.88 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 27.18 ± 0.16 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log new file mode 100644 index 0000000..c30a4c0 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 214.97 ± 1.13 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 18.57 ± 1.40 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log new file mode 100644 index 0000000..3f8e597 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 492.32 ± 2.55 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 27.23 ± 0.02 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..23b0efd --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 207.64 ± 0.55 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 18.84 ± 0.97 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log new file mode 100644 index 0000000..4bf79a0 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 576.03 ± 3.01 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 26.12 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..d1e5d2a --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 249.94 ± 1.13 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 19.18 ± 0.33 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..3d12444 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 569.42 ± 8.52 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 27.07 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..fedbfed --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 250.24 ± 0.88 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 18.77 ± 0.98 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..a6ca809 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 512.10 ± 4.69 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 27.27 ± 0.04 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..c1c2630 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 216.18 ± 0.74 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 19.71 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..7e6c7c6 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 493.72 ± 3.45 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 27.32 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..aec4b8d --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 209.02 ± 0.16 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 19.67 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..4f9d7a2 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | pp512 | 168.95 ± 7.69 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | tg128 | 10.62 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..69c594e --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 75.04 ± 0.02 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | tg32 @ d32768 | 8.68 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log new file mode 100644 index 0000000..e9a9b4f --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | pp512 | 351.97 ± 2.56 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | tg128 | 9.42 ± 0.21 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..e5c1a75 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 127.67 ± 0.45 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | tg32 @ d32768 | 8.31 ± 0.02 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1.log new file mode 100644 index 0000000..0dd1297 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 815.37 ± 5.82 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 58.54 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1__longctx32768.log new file mode 100644 index 0000000..cef8f0a --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 170.56 ± 4.38 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 31.30 ± 0.03 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1.log new file mode 100644 index 0000000..c248812 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 811.39 ± 6.56 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 58.57 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..d87028a --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 171.54 ± 4.45 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 31.29 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log new file mode 100644 index 0000000..73135a6 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1078.99 ± 11.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 56.45 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..0fa6c28 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 217.17 ± 8.71 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 30.94 ± 0.02 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..e4c7e21 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1080.52 ± 10.73 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 57.49 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..bb64b06 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 218.42 ± 7.66 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 30.96 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..75d0678 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1056.78 ± 36.08 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 59.15 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..fef8ac0 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 175.40 ± 4.11 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 31.98 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..e795bb5 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1039.16 ± 53.94 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 59.16 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..cacca70 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 174.67 ± 4.22 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 31.98 ± 0.02 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..3e8cb37 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | pp512 | 823.08 ± 48.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | tg128 | 66.14 ± 0.02 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..7e07c93 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 112.99 ± 0.13 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | tg32 @ d32768 | 27.35 ± 0.07 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log new file mode 100644 index 0000000..9fd3aa4 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | pp512 | 1064.73 ± 70.49 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | tg128 | 68.93 ± 0.04 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..f189e87 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 152.30 ± 3.42 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | tg32 @ d32768 | 34.18 ± 0.04 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1.log b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1.log new file mode 100644 index 0000000..33abf49 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1209.23 ± 7.46 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 71.48 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1__longctx32768.log new file mode 100644 index 0000000..1e0bd42 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 177.01 ± 5.01 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 34.40 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1.log b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1.log new file mode 100644 index 0000000..3c417e5 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1207.91 ± 9.78 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 71.48 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..d871650 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 175.56 ± 3.86 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 34.37 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log new file mode 100644 index 0000000..22e9a0f --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1263.87 ± 7.23 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 68.78 ± 0.02 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..0afdf48 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 222.20 ± 8.55 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 33.48 ± 0.02 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..5e79f7a --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1260.69 ± 6.89 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 68.94 ± 0.02 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..a96e13b --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 222.34 ± 7.55 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 33.52 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..3b0cc22 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1241.85 ± 15.18 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 72.57 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..5027639 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 176.98 ± 4.28 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 35.43 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..70fc750 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1229.55 ± 20.23 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 72.45 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..0b7f117 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 176.47 ± 4.18 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 35.44 ± 0.02 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..f27f9db --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | pp512 | 846.24 ± 47.60 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | tg128 | 86.32 ± 0.04 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..c8693bb --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 114.18 ± 0.07 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | tg32 @ d32768 | 30.07 ± 0.04 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log new file mode 100644 index 0000000..4e7288a --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | pp512 | 1005.90 ± 6.10 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | tg128 | 79.55 ± 6.96 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..15abf33 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 153.83 ± 3.76 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | tg32 @ d32768 | 37.44 ± 0.05 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log new file mode 100644 index 0000000..f5ae063 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | pp512 | 193.67 ± 2.12 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | tg128 | 28.98 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log new file mode 100644 index 0000000..d5132fd --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 217.45 ± 0.19 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 23.87 ± 3.54 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log new file mode 100644 index 0000000..d64554b --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | pp512 | 263.91 ± 3.82 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | tg128 | 28.97 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..206af26 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 296.64 ± 0.50 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 23.44 ± 4.28 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log new file mode 100644 index 0000000..a915bc9 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | pp512 | 592.54 ± 4.38 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | tg128 | 27.45 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..99b98d0 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 449.68 ± 1.06 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 25.10 ± 0.02 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..3d76bbf --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | pp512 | 592.83 ± 4.39 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | tg128 | 27.75 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..c1c013e --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 448.82 ± 1.02 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 25.07 ± 0.35 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..b89bb7b --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | pp512 | 282.60 ± 2.04 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | tg128 | 28.89 ± 0.20 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..5d5c0db --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 238.71 ± 0.62 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 23.93 ± 4.11 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..b8878a5 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | pp512 | 590.03 ± 3.05 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | tg128 | 28.73 ± 0.52 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..be1cd00 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 413.78 ± 0.61 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 23.54 ± 3.30 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..35f7add --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | pp512 | 426.39 ± 3.26 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | tg128 | 31.84 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..9ed3aa4 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 228.41 ± 1.50 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | tg32 @ d32768 | 22.47 ± 0.04 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log new file mode 100644 index 0000000..19a2fa7 --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | pp512 | 509.22 ± 20.34 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | tg128 | 29.92 ± 0.05 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..faa295c --- /dev/null +++ b/benchmark/results/04-02-2026/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 370.94 ± 32.12 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | tg32 @ d32768 | 26.00 ± 0.20 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1.log b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1.log new file mode 100644 index 0000000..a9d4fa7 --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | pp512 | 323.33 ± 0.27 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | tg128 | 14.24 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log new file mode 100644 index 0000000..ee21601 --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 232.79 ± 5.34 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 11.65 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log new file mode 100644 index 0000000..9ed3a0f --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | pp512 | 324.44 ± 0.31 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | tg128 | 14.24 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..45ba7e6 --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 229.19 ± 6.79 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 11.66 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log new file mode 100644 index 0000000..86e7334 --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | pp512 | 936.69 ± 1.33 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | tg128 | 14.23 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..9364a43 --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 258.34 ± 1.81 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 11.63 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..2eda1f4 --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | pp512 | 935.37 ± 1.09 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | tg128 | 14.20 ± 0.02 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..5f3c340 --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 261.44 ± 5.27 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 11.62 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..fb87a4a --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | pp512 | 943.63 ± 1.62 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | tg128 | 14.25 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..14934cf --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 396.59 ± 26.74 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 11.65 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..ba60108 --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | pp512 | 942.52 ± 1.34 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | tg128 | 14.25 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..878aaa3 --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 376.68 ± 9.34 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 11.65 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..55932ee --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | pp512 | 125.50 ± 0.06 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | tg128 | 14.45 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..41a43d8 --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 111.11 ± 0.04 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | tg32 @ d32768 | 11.40 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log new file mode 100644 index 0000000..edac157 --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | pp512 | 687.05 ± 0.75 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | tg128 | 14.14 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..24d4dab --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 376.92 ± 18.46 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | tg32 @ d32768 | 11.72 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1.log b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1.log new file mode 100644 index 0000000..1ff6f83 --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | pp512 | 463.92 ± 1.19 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | tg128 | 4.02 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log new file mode 100644 index 0000000..b36f842 --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 191.32 ± 3.30 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 3.68 ± 0.11 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log new file mode 100644 index 0000000..c643303 --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | pp512 | 528.00 ± 0.44 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | tg128 | 4.02 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..cc34693 --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 201.67 ± 1.78 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 3.74 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log new file mode 100644 index 0000000..00bcee5 --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | pp512 | 508.08 ± 0.85 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | tg128 | 4.00 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..12a07a9 --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 222.44 ± 2.25 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 3.72 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..97f96ee --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | pp512 | 508.48 ± 0.88 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | tg128 | 4.00 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..93404ec --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 220.03 ± 0.98 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 3.72 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..d8498c8 --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | pp512 | 549.57 ± 2.42 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | tg128 | 4.02 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..08d310a --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 215.98 ± 0.94 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 3.73 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..59af47c --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | pp512 | 529.01 ± 0.98 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | tg128 | 4.02 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..359ca8d --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 204.92 ± 2.92 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 3.73 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..db92211 --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | pp512 | 9.32 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | tg128 | 3.87 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..7a34429 --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 9.20 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | tg32 @ d32768 | 3.60 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log new file mode 100644 index 0000000..858bf96 --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | pp512 | 123.07 ± 0.27 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | tg128 | 3.92 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..767256d --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 82.96 ± 0.72 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | tg32 @ d32768 | 3.66 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1.log b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1.log new file mode 100644 index 0000000..c50aeb9 --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | pp512 | 2870.77 ± 12.89 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | tg128 | 84.57 ± 0.03 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1__longctx32768.log new file mode 100644 index 0000000..861d2dd --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 1639.03 ± 15.14 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 61.51 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1.log b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1.log new file mode 100644 index 0000000..7ec3e36 --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | pp512 | 2807.93 ± 16.33 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | tg128 | 84.66 ± 0.03 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..aba12ed --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 1262.15 ± 24.34 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 61.54 ± 0.04 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log new file mode 100644 index 0000000..6b110c7 --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | pp512 | 2891.85 ± 2.60 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | tg128 | 82.18 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..71df791 --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 1600.62 ± 30.98 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 60.21 ± 0.11 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..98f27b4 --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | pp512 | 2893.75 ± 3.92 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | tg128 | 82.15 ± 0.02 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..d1578cd --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 1419.18 ± 40.21 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 60.24 ± 0.11 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..fe1a281 --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | pp512 | 2805.65 ± 13.25 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | tg128 | 85.35 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..ef479c4 --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 1215.66 ± 10.33 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 62.02 ± 0.03 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..da4aac2 --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | pp512 | 2800.57 ± 47.75 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | tg128 | 85.47 ± 0.02 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..e240976 --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 1214.20 ± 13.26 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 62.03 ± 0.03 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..6ebe735 --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | pp512 | 657.19 ± 0.41 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | tg128 | 86.55 ± 0.10 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..404a158 --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 493.70 ± 0.98 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | tg32 @ d32768 | 58.57 ± 0.11 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log new file mode 100644 index 0000000..0537089 --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | pp512 | 1977.82 ± 204.87 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | tg128 | 91.09 ± 3.96 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..e118216 --- /dev/null +++ b/benchmark/results/04-02-2026/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 1149.92 ± 30.21 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | tg32 @ d32768 | 67.86 ± 0.22 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1.log b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1.log new file mode 100644 index 0000000..f098ce7 --- /dev/null +++ b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | pp512 | 181.09 ± 1.36 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | tg128 | 51.77 ± 0.73 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1__longctx32768.log b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1__longctx32768.log new file mode 100644 index 0000000..0d8b56a --- /dev/null +++ b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 217.33 ± 0.32 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 36.51 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1.log b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1.log new file mode 100644 index 0000000..d6ccfa5 --- /dev/null +++ b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | pp512 | 181.23 ± 1.39 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | tg128 | 52.06 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..9bfd24d --- /dev/null +++ b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 218.30 ± 0.10 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 34.08 ± 4.33 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log new file mode 100644 index 0000000..a05b582 --- /dev/null +++ b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | pp512 | 681.58 ± 4.94 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | tg128 | 50.85 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..5948f34 --- /dev/null +++ b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 396.76 ± 35.92 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.13 ± 0.05 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..abd20b5 --- /dev/null +++ b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | pp512 | 678.97 ± 4.29 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | tg128 | 51.88 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..e3aae6c --- /dev/null +++ b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 400.64 ± 35.51 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 37.97 ± 3.34 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..5365be2 --- /dev/null +++ b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | pp512 | 649.28 ± 39.54 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | tg128 | 52.00 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..c611dc9 --- /dev/null +++ b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 259.94 ± 5.81 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.30 ± 0.03 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..83fdee3 --- /dev/null +++ b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | pp512 | 666.65 ± 12.50 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | tg128 | 52.05 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..420ec5d --- /dev/null +++ b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 261.35 ± 6.47 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.36 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..4e6b4ce --- /dev/null +++ b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | pp512 | 643.04 ± 39.69 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | tg128 | 54.00 ± 0.04 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..394417c --- /dev/null +++ b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 195.45 ± 2.65 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | tg32 @ d32768 | 37.02 ± 0.03 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log new file mode 100644 index 0000000..40087c2 --- /dev/null +++ b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | pp512 | 597.02 ± 9.82 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | tg128 | 57.38 ± 0.04 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..e8acd38 --- /dev/null +++ b/benchmark/results/04-02-2026/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 278.37 ± 7.19 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | tg32 @ d32768 | 42.78 ± 0.09 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm-7_2__fa1.log b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm-7_2__fa1.log new file mode 100644 index 0000000..78c56b0 --- /dev/null +++ b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm-7_2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | pp512 | 547.85 ± 6.58 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | tg128 | 73.52 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm-7_2__fa1__longctx32768.log b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm-7_2__fa1__longctx32768.log new file mode 100644 index 0000000..e4ebb3b --- /dev/null +++ b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm-7_2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 402.32 ± 0.67 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 52.50 ± 0.02 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1.log b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1.log new file mode 100644 index 0000000..bb60d45 --- /dev/null +++ b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | pp512 | 546.41 ± 6.71 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | tg128 | 73.52 ± 0.02 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..f06a549 --- /dev/null +++ b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 402.65 ± 1.50 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 52.58 ± 0.02 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log new file mode 100644 index 0000000..77a4b05 --- /dev/null +++ b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | pp512 | 1779.88 ± 16.15 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | tg128 | 73.26 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..7323d53 --- /dev/null +++ b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 583.30 ± 9.18 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 57.85 ± 0.02 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..537f920 --- /dev/null +++ b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | pp512 | 1785.44 ± 15.68 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | tg128 | 73.22 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..9599307 --- /dev/null +++ b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 600.15 ± 13.61 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 57.78 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..adb683c --- /dev/null +++ b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | pp512 | 1742.62 ± 12.05 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | tg128 | 73.51 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..46f28e1 --- /dev/null +++ b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 428.95 ± 5.63 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 57.82 ± 0.02 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..c06bd1f --- /dev/null +++ b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | pp512 | 1730.96 ± 9.70 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | tg128 | 73.53 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..e0f62d9 --- /dev/null +++ b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 425.86 ± 3.58 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 57.90 ± 0.02 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..fd7fc59 --- /dev/null +++ b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | pp512 | 1300.97 ± 78.99 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | tg128 | 77.58 ± 0.03 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..92c9d52 --- /dev/null +++ b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 337.80 ± 4.40 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | tg32 @ d32768 | 53.06 ± 0.10 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log new file mode 100644 index 0000000..5534eab --- /dev/null +++ b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | pp512 | 1397.71 ± 70.15 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | tg128 | 80.99 ± 0.06 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..9f1beb7 --- /dev/null +++ b/benchmark/results/04-02-2026/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 416.91 ± 7.90 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | tg32 @ d32768 | 60.56 ± 0.77 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm-7_2__fa1.log b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm-7_2__fa1.log new file mode 100644 index 0000000..4aeb30d --- /dev/null +++ b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm-7_2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | pp512 | 549.58 ± 0.25 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | tg128 | 50.55 ± 0.02 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm-7_2__fa1__longctx32768.log b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm-7_2__fa1__longctx32768.log new file mode 100644 index 0000000..a671c82 --- /dev/null +++ b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm-7_2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 144.34 ± 1.03 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 5.60 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1.log b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1.log new file mode 100644 index 0000000..8caec46 --- /dev/null +++ b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | pp512 | 548.97 ± 0.14 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | tg128 | 50.52 ± 0.02 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..259fde9 --- /dev/null +++ b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 143.70 ± 0.31 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 5.60 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1.log b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1.log new file mode 100644 index 0000000..d34295f --- /dev/null +++ b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | pp512 | 1597.02 ± 1.89 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | tg128 | 51.01 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..0f41bbb --- /dev/null +++ b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 193.62 ± 1.29 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 6.93 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..ed7cd83 --- /dev/null +++ b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | pp512 | 1598.36 ± 1.08 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | tg128 | 51.01 ± 0.02 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..e9332b6 --- /dev/null +++ b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 191.18 ± 2.26 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 6.94 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..dd9ab75 --- /dev/null +++ b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | pp512 | 1590.62 ± 1.92 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | tg128 | 51.17 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..8634ecc --- /dev/null +++ b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 210.19 ± 4.57 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 5.56 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..08f165c --- /dev/null +++ b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | pp512 | 1590.40 ± 2.71 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | tg128 | 51.21 ± 0.01 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..355d6a7 --- /dev/null +++ b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 216.78 ± 3.22 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 5.56 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..b7dc396 --- /dev/null +++ b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | pp512 | 349.95 ± 0.30 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | tg128 | 56.00 ± 0.22 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..34caa8e --- /dev/null +++ b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 152.53 ± 0.11 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | tg32 @ d32768 | 9.29 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__vulkan_radv__fa1.log b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__vulkan_radv__fa1.log new file mode 100644 index 0000000..e89bb38 --- /dev/null +++ b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | pp512 | 1355.55 ± 2.34 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | tg128 | 55.88 ± 0.13 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..2ca9650 --- /dev/null +++ b/benchmark/results/04-02-2026/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 246.20 ± 1.24 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | tg32 @ d32768 | 8.76 ± 0.00 | + +build: e0c93af2a (7938) diff --git a/benchmark/results/04-02-2026/system_info.json b/benchmark/results/04-02-2026/system_info.json new file mode 100644 index 0000000..b7fac37 --- /dev/null +++ b/benchmark/results/04-02-2026/system_info.json @@ -0,0 +1 @@ +{"distro": "Fedora Linux 43 (Workstation Edition)", "kernel": "6.18.5-200.fc43.x86_64", "linux_firmware": "linux-firmware-20260110-1.fc43.noarch", "timestamp": "04 Feb 2026"} diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1.log index 07bbca3..35551cf 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 72.93 ± 0.06 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.95 ± 0.06 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 78.37 ± 0.10 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.96 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log index d3dcb44..4f22b35 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 16.57 ± 0.04 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.08 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.07 ± 0.06 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.07 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log index dc35095..f8dbba3 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 71.58 ± 0.06 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.99 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 78.30 ± 0.11 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.98 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log index 914ebcc..8d95301 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 16.48 ± 0.03 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.07 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.08 ± 0.02 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.08 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log index b4213c1..1e98d22 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 79.51 ± 0.07 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.72 ± 0.10 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 78.37 ± 0.13 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.76 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log index 86f7ce7..8999b32 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 17.57 ± 0.05 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.15 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.56 ± 0.07 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.09 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log index 2c8897a..fe9db4c 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 79.24 ± 0.10 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.67 ± 0.11 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 78.63 ± 0.08 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.80 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log index 77126a6..a0c8e97 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 17.53 ± 0.03 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.15 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.58 ± 0.03 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.08 ± 0.03 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log index 8e9d026..29499ed 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 78.28 ± 0.06 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.98 ± 0.04 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 80.49 ± 0.14 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.99 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log index b8f691a..dedaf41 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 17.18 ± 0.03 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.06 ± 0.02 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 21.15 ± 0.06 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.07 ± 0.01 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log index baf867e..2223087 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 80.59 ± 0.10 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.99 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 78.97 ± 0.09 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.99 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log index 108f8a6..a4d9675 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 17.27 ± 0.02 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.07 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 21.17 ± 0.28 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.07 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log index 3836dd7..4540fd9 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log @@ -1,8 +1,24 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | pp512 | 17.65 ± 0.01 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | tg128 | 3.00 ± 0.00 | - -build: e0c93af2a (7938) +/lib64/libggml-base.so.0(+0x3c25) [0x7f8003c3bc25] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f8003c3bfeb] +/lib64/libggml-base.so.0(+0x16669) [0x7f8003c4e669] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7f80033b2bfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f800339cd3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7f80033b2ea8] +/lib64/libggml-vulkan.so.0(+0x14f76) [0x7f8003cf6f76] +/lib64/libggml-vulkan.so.0(+0x13597f) [0x7f8003e1797f] +/lib64/libggml-vulkan.so.0(+0x136411) [0x7f8003e18411] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f8003c574d3] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f800756ce70] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f800756f445] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f8007575aaf] +/lib64/libllama.so.0(llama_decode+0xe) [0x7f800757742e] +/usr/sbin/llama-bench() [0x41cc3b] +/usr/sbin/llama-bench() [0x41977f] +/lib64/libc.so.6(+0x35b5) [0x7f80030835b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f8003083668] +/usr/sbin/llama-bench() [0x41b595] +terminate called after throwing an instance of 'vk::DeviceLostError' + what(): vk::Queue::submit: ErrorDeviceLost +✖ ! [vulkan_amdvlk] Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__fa1 failed (exit 0) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log index b2e6658..8d7f3ff 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,23 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 4.94 ± 0.01 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | tg32 @ d32768 | 1.69 ± 0.00 | - -build: e0c93af2a (7938) +/lib64/libggml-base.so.0(+0x3c25) [0x7f34c7070c25] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f34c7070feb] +/lib64/libggml-base.so.0(+0x16669) [0x7f34c7083669] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7f34c67e7bfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f34c67d1d3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7f34c67e7ea8] +/lib64/libggml-vulkan.so.0(+0x16b68) [0x7f34c712db68] +/lib64/libggml-vulkan.so.0(+0xfcfd0) [0x7f34c7213fd0] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7f34c708c092] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f34ca9a1e70] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f34ca9a4445] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f34ca9aaaaf] +/lib64/libllama.so.0(llama_decode+0xe) [0x7f34ca9ac42e] +/usr/sbin/llama-bench() [0x41cc3b] +/usr/sbin/llama-bench() [0x41977f] +/lib64/libc.so.6(+0x35b5) [0x7f34c64b85b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f34c64b8668] +/usr/sbin/llama-bench() [0x41b595] +terminate called after throwing an instance of 'vk::DeviceLostError' + what(): vk::Queue::submit: ErrorDeviceLost +✖ ! [vulkan_amdvlk] Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log index aeb25b4..1c52acf 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | pp512 | 54.76 ± 11.46 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | tg128 | 3.00 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | pp512 | 47.20 ± 0.03 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | tg128 | 2.99 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log index 31ef62e..eb50e64 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 7.15 ± 0.02 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | tg32 @ d32768 | 2.27 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 7.39 ± 0.02 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 2.60 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1.log index 455d013..8bc6dfa 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 454.95 ± 1.90 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 22.26 ± 0.03 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 435.72 ± 2.11 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 21.38 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log index 71489ab..ec45ef3 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 94.79 ± 0.56 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 16.48 ± 0.09 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 87.93 ± 0.11 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.33 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log index e9729d2..6771408 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 425.21 ± 1.79 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 23.41 ± 0.03 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 403.83 ± 1.74 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 21.37 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log index cd27b5f..28c5c04 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 93.83 ± 0.40 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 16.55 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 87.49 ± 0.23 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.33 ± 0.01 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1.log index d58dab9..f8af5b7 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 407.15 ± 2.05 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 21.51 ± 0.03 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 393.19 ± 1.92 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 19.36 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log index aa07720..431595d 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 101.09 ± 0.37 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 16.23 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 94.74 ± 0.05 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.06 ± 0.18 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log index f69d3f6..5c65164 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 414.23 ± 2.09 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 23.11 ± 0.03 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 393.15 ± 2.00 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 21.11 ± 0.03 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log index 6b8851c..adc0015 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 100.06 ± 0.38 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 15.97 ± 0.45 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 94.55 ± 0.30 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.18 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1.log index 5c3d51b..3da36d8 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 489.62 ± 3.63 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 23.40 ± 0.02 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 476.23 ± 2.21 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 21.36 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log index 718fbd4..8a819b6 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 92.48 ± 1.13 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 16.50 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 86.79 ± 0.15 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.32 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log index 45c08f7..d0c28f4 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 425.86 ± 2.29 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 23.41 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 401.13 ± 5.52 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 21.35 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log index 9bd643e..3da0eab 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 92.06 ± 0.08 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 16.51 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 85.92 ± 0.13 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.30 ± 0.01 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1.log index f8630c5..1400856 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | pp512 | 106.42 ± 0.08 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | tg128 | 10.87 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp512 | 114.88 ± 0.15 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg128 | 10.48 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log index a025866..0c10034 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 6.09 ± 0.00 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | tg32 @ d32768 | 8.28 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 10.15 ± 0.00 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 5.05 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1.log index f2c87cb..2c91e29 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | pp512 | 333.10 ± 6.48 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | tg128 | 9.51 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp512 | 342.11 ± 5.47 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg128 | 9.48 ± 0.02 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log index ad53f7f..d0eb79f 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 78.99 ± 0.25 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | tg32 @ d32768 | 8.13 ± 0.02 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 102.93 ± 0.11 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 7.82 ± 0.01 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1.log index 01e9227..f49590b 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 398.34 ± 1.32 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 35.94 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 905.79 ± 2.84 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 32.96 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log index e85162c..b2ff1c9 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 90.22 ± 4.88 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 22.35 ± 0.04 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 91.97 ± 0.19 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 20.58 ± 0.07 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log index da85676..fcbe701 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 398.87 ± 1.21 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 36.09 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 893.68 ± 28.19 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 33.20 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log index 40cfc38..0835b60 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 92.13 ± 0.15 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 21.56 ± 1.34 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 91.83 ± 0.16 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 20.62 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log index a68e922..5f26656 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 947.86 ± 2.03 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 33.77 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 903.39 ± 1.04 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 31.04 ± 0.01 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log index 11651a6..8dc2481 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 84.85 ± 1.04 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 21.89 ± 0.04 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 99.84 ± 0.23 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 20.33 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log index d44f270..ada6b79 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 952.84 ± 2.21 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 35.23 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 906.25 ± 1.64 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 32.57 ± 0.01 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log index 3627139..41a8790 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 84.01 ± 0.58 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 21.97 ± 0.04 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 100.07 ± 0.20 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 20.35 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log index 35840f2..37c4d87 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 983.72 ± 3.21 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 36.20 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 902.85 ± 59.94 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 33.16 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log index 7c3adf6..d4a1f68 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 80.32 ± 1.28 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 22.31 ± 0.04 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 89.75 ± 0.31 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 20.59 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log index 3d7dec0..fd05cd0 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | pp512 | 955.10 ± 4.53 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | tg128 | 36.16 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 906.42 ± 2.57 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 33.18 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log index 8bd5686..d6e72bd 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 81.34 ± 1.80 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 22.32 ± 0.04 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 89.99 ± 0.26 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 20.60 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log index 5754f88..fcd1fbe 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | pp512 | 368.78 ± 0.17 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | tg128 | 40.80 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp512 | 499.98 ± 1.29 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg128 | 39.01 ± 0.01 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log index 58b0878..e9e723c 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,23 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 6.35 ± 0.00 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | tg32 @ d32768 | 18.75 ± 0.00 | - -build: e0c93af2a (7938) +/lib64/libggml-base.so.0(+0x3c25) [0x7fa3d16b4c25] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fa3d16b4feb] +/lib64/libggml-base.so.0(+0x16669) [0x7fa3d16c7669] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7fa3d0e2bbfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7fa3d0e15d3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7fa3d0e2bea8] +/lib64/libggml-vulkan.so.0(+0x16b68) [0x7fa3d1771b68] +/lib64/libggml-vulkan.so.0(+0xfcfd0) [0x7fa3d1857fd0] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7fa3d16d0092] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7fa3d4fe5e70] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7fa3d4fe8445] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7fa3d4feeaaf] +/lib64/libllama.so.0(llama_decode+0xe) [0x7fa3d4ff042e] +/usr/sbin/llama-bench() [0x41cc3b] +/usr/sbin/llama-bench() [0x419a10] +/lib64/libc.so.6(+0x35b5) [0x7fa3d0afc5b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7fa3d0afc668] +/usr/sbin/llama-bench() [0x41b595] +terminate called after throwing an instance of 'vk::DeviceLostError' + what(): vk::Queue::submit: ErrorDeviceLost +✖ ! [vulkan_amdvlk] GLM-4.7-Flash-UD-Q8_K_XL__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log index 8c3838a..c84fe4a 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | pp512 | 877.18 ± 8.15 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | tg128 | 40.07 ± 0.78 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp512 | 853.46 ± 6.70 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg128 | 40.38 ± 1.22 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log index f135b33..3b8a27e 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 90.27 ± 0.42 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | tg32 @ d32768 | 23.07 ± 0.03 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 133.27 ± 0.03 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 21.17 ± 0.01 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1.log index df4a892..16a5422 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 800.17 ± 1.72 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.49 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 673.71 ± 1.42 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.35 ± 0.17 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1__longctx32768.log index f184a50..edb719b 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1__longctx32768.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 169.18 ± 1.16 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.11 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 160.30 ± 0.21 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.08 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1.log index 32590b1..fe8ad78 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 803.22 ± 2.21 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.49 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 748.37 ± 4.67 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.42 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1__longctx32768.log index 505df24..ff41820 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 170.11 ± 0.81 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.11 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 165.11 ± 1.59 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.08 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log index 8118e71..aaf2ed8 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 764.18 ± 1.66 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.48 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 706.16 ± 3.76 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.39 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log index f3f55d3..f03361f 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 166.22 ± 1.20 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.10 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 166.61 ± 1.47 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.05 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log index 661b27c..7a1e96f 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 766.68 ± 1.07 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.48 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 710.61 ± 4.31 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.39 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx32768.log index 6cd94b5..4ae9022 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 164.84 ± 1.99 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.10 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 161.22 ± 2.64 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.06 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log index 1ef3f29..4bfad54 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 990.88 ± 3.15 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.50 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 881.45 ± 3.04 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.44 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log index 5b40f9e..e15c3d6 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 172.42 ± 3.61 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.10 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 213.50 ± 0.70 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.08 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log index 545b269..710a6a1 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 799.71 ± 2.09 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.49 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 735.98 ± 5.08 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.44 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log index 5974e95..8ee4f6b 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 170.19 ± 1.69 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.10 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 205.29 ± 0.84 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.08 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log index 3c69e17..47e06e4 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log @@ -1,8 +1,23 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | pp512 | 19.70 ± 0.00 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | tg128 | 8.24 ± 0.00 | - -build: e0c93af2a (7938) +/lib64/libggml-base.so.0(+0x3c25) [0x7f9947881c25] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f9947881feb] +/lib64/libggml-base.so.0(+0x16669) [0x7f9947894669] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7f9946ff8bfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f9946fe2d3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7f9946ff8ea8] +/lib64/libggml-vulkan.so.0(+0x16b68) [0x7f994793eb68] +/lib64/libggml-vulkan.so.0(+0xfcfd0) [0x7f9947a24fd0] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7f994789d092] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f994b1b2e70] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f994b1b5445] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f994b1bbaaf] +/lib64/libllama.so.0(llama_decode+0xe) [0x7f994b1bd42e] +/usr/sbin/llama-bench() [0x41cc3b] +/usr/sbin/llama-bench() [0x419a10] +/lib64/libc.so.6(+0x35b5) [0x7f9946cc95b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f9946cc9668] +/usr/sbin/llama-bench() [0x41b595] +terminate called after throwing an instance of 'vk::DeviceLostError' + what(): vk::Queue::submit: ErrorDeviceLost +✖ ! [vulkan_amdvlk] Ministral-3-14B-Instruct-2512-BF16__fa1 failed (exit 0) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log index f5ad8c4..59b5b4b 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,23 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 16.69 ± 0.01 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | tg32 @ d32768 | 6.41 ± 0.00 | - -build: e0c93af2a (7938) +/lib64/libggml-base.so.0(+0x3c25) [0x7f4bc4c80c25] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f4bc4c80feb] +/lib64/libggml-base.so.0(+0x16669) [0x7f4bc4c93669] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7f4bc43f7bfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f4bc43e1d3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7f4bc43f7ea8] +/lib64/libggml-vulkan.so.0(+0x16b68) [0x7f4bc4d3db68] +/lib64/libggml-vulkan.so.0(+0xfcfd0) [0x7f4bc4e23fd0] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7f4bc4c9c092] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f4bc85b1e70] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f4bc85b4445] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f4bc85baaaf] +/lib64/libllama.so.0(llama_decode+0xe) [0x7f4bc85bc42e] +/usr/sbin/llama-bench() [0x41cc3b] +/usr/sbin/llama-bench() [0x41977f] +/lib64/libc.so.6(+0x35b5) [0x7f4bc40c85b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f4bc40c8668] +/usr/sbin/llama-bench() [0x41b595] +terminate called after throwing an instance of 'vk::DeviceLostError' + what(): vk::Queue::submit: ErrorDeviceLost +✖ ! [vulkan_amdvlk] Ministral-3-14B-Instruct-2512-BF16__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log index e04be5a..8311fc9 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | pp512 | 222.01 ± 0.94 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | tg128 | 7.59 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp512 | 166.51 ± 0.62 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg128 | 7.94 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log index d5938e7..645c378 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 76.47 ± 0.38 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | tg32 @ d32768 | 6.39 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 72.65 ± 0.24 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 6.70 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__fa1.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__fa1.log new file mode 100644 index 0000000..3e633b5 --- /dev/null +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | pp512 | 259.71 ± 1.51 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | tg128 | 15.74 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log new file mode 100644 index 0000000..9cd5891 --- /dev/null +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 302.29 ± 0.44 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.58 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log new file mode 100644 index 0000000..2d8a89d --- /dev/null +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | pp512 | 263.95 ± 1.57 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | tg128 | 16.02 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..a6f51a2 --- /dev/null +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 305.52 ± 0.18 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.59 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1.log new file mode 100644 index 0000000..c181684 --- /dev/null +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | pp512 | 260.98 ± 1.56 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | tg128 | 15.60 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..6d79881 --- /dev/null +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 324.69 ± 0.27 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.38 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..6c0fee0 --- /dev/null +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | pp512 | 262.30 ± 1.42 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | tg128 | 15.86 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..ca1525c --- /dev/null +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 324.11 ± 0.41 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.45 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..2b76326 --- /dev/null +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | pp512 | 228.50 ± 1.06 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | tg128 | 16.07 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..39e5779 --- /dev/null +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 284.43 ± 0.38 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.76 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..c5c9ddb --- /dev/null +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | pp512 | 236.84 ± 1.32 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | tg128 | 16.04 ± 0.05 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..db1e7af --- /dev/null +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 290.04 ± 0.11 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.76 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..ecc3dbc --- /dev/null +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | pp512 | 140.84 ± 0.27 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | tg128 | 13.97 ± 0.05 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..fe06b04 --- /dev/null +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 106.21 ± 0.16 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 13.21 ± 0.32 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1.log new file mode 100644 index 0000000..cd213bb --- /dev/null +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | pp512 | 190.66 ± 7.11 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | tg128 | 14.41 ± 0.03 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..4618d0c --- /dev/null +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 191.12 ± 0.46 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 13.85 ± 0.26 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1.log index 682b041..bebfbdf 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | pp512 | 202.36 ± 3.50 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | tg128 | 15.80 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 194.62 ± 1.88 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.64 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log index 9057c7d..69580a4 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 41.36 ± 0.87 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 9.65 ± 0.30 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 49.93 ± 0.03 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.41 ± 0.37 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log index 366d2a3..3945af1 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | pp512 | 200.10 ± 8.37 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | tg128 | 16.04 ± 0.06 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 194.32 ± 1.84 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.80 ± 0.11 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log index cb8f1d5..51d1402 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 41.53 ± 0.41 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 9.50 ± 0.75 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 49.97 ± 0.14 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.63 ± 0.11 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log index a353c9d..87dbc2a 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | pp512 | 205.05 ± 3.62 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | tg128 | 14.98 ± 0.03 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 197.89 ± 1.84 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 12.96 ± 0.01 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log index e89ce17..18f0388 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 51.11 ± 0.62 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 9.63 ± 0.07 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 70.32 ± 0.08 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.22 ± 0.11 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log index 50fa18d..2a8c48b 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | pp512 | 203.41 ± 3.52 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | tg128 | 15.00 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 198.04 ± 1.90 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 13.11 ± 0.02 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log index e681c9d..5d89766 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 51.19 ± 0.64 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 9.58 ± 0.10 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 70.27 ± 0.05 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.02 ± 0.31 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log index 803ea12..e3df124 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | pp512 | 200.04 ± 4.11 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | tg128 | 16.19 ± 0.09 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 197.46 ± 1.16 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 15.31 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log index 7bd5d26..7c95bbf 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 41.42 ± 0.37 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 10.94 ± 0.42 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 48.93 ± 0.02 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.34 ± 0.41 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log index 291f2c8..b2bd010 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | pp512 | 197.48 ± 10.80 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | tg128 | 16.20 ± 0.08 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 194.00 ± 1.99 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 15.33 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log index a79a619..61b7bff 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 41.60 ± 0.36 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 10.81 ± 0.61 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 49.33 ± 0.04 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.46 ± 0.34 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log index 5264d6e..865311b 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | pp512 | 119.82 ± 3.30 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | tg128 | 17.75 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp512 | 106.76 ± 0.77 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg128 | 16.49 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log index 8ecec15..fdee904 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 27.41 ± 0.01 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | tg32 @ d32768 | 3.42 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 26.54 ± 0.02 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 9.57 ± 0.01 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log index 67b83cc..9130533 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | pp512 | 133.28 ± 1.45 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | tg128 | 15.98 ± 0.25 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp512 | 158.81 ± 2.40 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg128 | 17.16 ± 0.01 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log index 7945690..b16d990 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 30.79 ± 0.06 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | tg32 @ d32768 | 6.50 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 28.25 ± 0.02 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 11.49 ± 0.02 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log index f8f2cd5..cfd6de4 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 489.11 ± 2.88 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 27.18 ± 0.16 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 474.42 ± 2.29 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 25.36 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log index c30a4c0..f391dea 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 214.97 ± 1.13 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 18.57 ± 1.40 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 189.58 ± 0.12 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 18.53 ± 0.01 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log index 3f8e597..3b26f36 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 492.32 ± 2.55 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 27.23 ± 0.02 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 479.78 ± 2.72 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 25.37 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log index 23b0efd..820d3b1 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 207.64 ± 0.55 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 18.84 ± 0.97 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 180.26 ± 0.37 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 18.54 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log index 4bf79a0..b60bde6 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 576.03 ± 3.01 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 26.12 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 556.96 ± 2.81 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 23.05 ± 0.01 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log index d1e5d2a..d9f8066 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 249.94 ± 1.13 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 19.18 ± 0.33 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 238.10 ± 0.11 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 18.26 ± 0.01 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log index 3d12444..6dbd18b 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 569.42 ± 8.52 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 27.07 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 556.80 ± 4.26 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 25.16 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log index fedbfed..d9525bd 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 250.24 ± 0.88 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 18.77 ± 0.98 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 237.85 ± 0.37 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 18.46 ± 0.01 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log index a6ca809..8f2facd 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 512.10 ± 4.69 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 27.27 ± 0.04 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 503.77 ± 3.42 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 25.54 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log index c1c2630..e21cfdb 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 216.18 ± 0.74 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 19.71 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 182.59 ± 0.07 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 18.25 ± 1.21 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log index 7e6c7c6..e018831 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 493.72 ± 3.45 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 27.32 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 482.99 ± 0.86 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 25.49 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log index aec4b8d..dfd3f50 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 209.02 ± 0.16 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 19.67 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 175.78 ± 0.14 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 18.94 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log index 4f9d7a2..05e1c62 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | pp512 | 168.95 ± 7.69 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | tg128 | 10.62 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 171.39 ± 0.81 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 10.48 ± 0.01 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log index 69c594e..05e07dc 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 75.04 ± 0.02 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | tg32 @ d32768 | 8.68 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 73.62 ± 0.08 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 8.49 ± 0.01 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log index e9a9b4f..7bc1638 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | pp512 | 351.97 ± 2.56 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | tg128 | 9.42 ± 0.21 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 369.06 ± 2.58 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 9.38 ± 0.01 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log index e5c1a75..6636734 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 127.67 ± 0.45 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | tg32 @ d32768 | 8.31 ± 0.02 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 140.81 ± 0.52 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 8.24 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1.log index 0dd1297..8ac5001 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 815.37 ± 5.82 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 58.54 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 994.33 ± 11.86 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 55.87 ± 0.01 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1__longctx32768.log index cef8f0a..1d4eadc 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 170.56 ± 4.38 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 31.30 ± 0.03 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 209.15 ± 0.11 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 30.88 ± 0.03 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1.log index c248812..81f42e5 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 811.39 ± 6.56 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 58.57 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 994.22 ± 10.06 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 55.84 ± 0.04 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log index d87028a..fc65e7c 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 171.54 ± 4.45 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 31.29 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 208.93 ± 0.21 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 30.90 ± 0.01 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log index 73135a6..8d03802 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1078.99 ± 11.01 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 56.45 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1009.37 ± 9.64 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 53.13 ± 0.02 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log index 0fa6c28..e60bbfd 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 217.17 ± 8.71 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 30.94 ± 0.02 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 285.49 ± 0.18 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 30.53 ± 0.02 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log index e4c7e21..9ab5751 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1080.52 ± 10.73 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 57.49 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1012.69 ± 9.02 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 54.94 ± 0.01 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log index bb64b06..1d13bc6 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 218.42 ± 7.66 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 30.96 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 285.25 ± 0.16 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 30.50 ± 0.02 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log index 75d0678..ae4042e 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1056.78 ± 36.08 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 59.15 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1023.24 ± 11.08 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 56.92 ± 0.17 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log index fef8ac0..a37013f 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 175.40 ± 4.11 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 31.98 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 206.37 ± 0.03 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 31.69 ± 0.01 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log index e795bb5..7aedfaa 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1039.16 ± 53.94 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 59.16 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1012.11 ± 7.91 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 56.92 ± 0.21 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log index cacca70..4a658bc 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 174.67 ± 4.22 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 31.98 ± 0.02 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 205.56 ± 0.23 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 31.62 ± 0.02 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log index 3e8cb37..9239487 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | pp512 | 823.08 ± 48.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | tg128 | 66.14 ± 0.02 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 828.53 ± 4.66 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 63.31 ± 0.02 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log index 7e07c93..a200dea 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 112.99 ± 0.13 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | tg32 @ d32768 | 27.35 ± 0.07 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 110.50 ± 0.14 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 25.87 ± 0.03 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log index 9fd3aa4..29ec4d4 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | pp512 | 1064.73 ± 70.49 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | tg128 | 68.93 ± 0.04 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 1145.66 ± 9.68 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 68.15 ± 0.11 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log index f189e87..3dfccf2 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 152.30 ± 3.42 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | tg32 @ d32768 | 34.18 ± 0.04 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 193.03 ± 0.86 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 34.02 ± 0.04 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1.log index 33abf49..e6d9faa 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1209.23 ± 7.46 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 71.48 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1149.31 ± 13.28 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 67.72 ± 0.02 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1__longctx32768.log index 1e0bd42..31bdd1d 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 177.01 ± 5.01 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 34.40 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 212.38 ± 0.33 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.49 ± 0.23 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1.log index 3c417e5..0031201 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1207.91 ± 9.78 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 71.48 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1149.83 ± 7.05 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 68.05 ± 0.68 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1__longctx32768.log index d871650..1b4492c 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 175.56 ± 3.86 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 34.37 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 212.45 ± 0.21 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.52 ± 0.02 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log index 22e9a0f..9642b43 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1263.87 ± 7.23 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 68.78 ± 0.02 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1170.01 ± 7.53 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 65.12 ± 0.03 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log index 0afdf48..ad3e381 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 222.20 ± 8.55 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 33.48 ± 0.02 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 291.28 ± 0.14 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 32.65 ± 0.02 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log index 5e79f7a..a288f4e 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1260.69 ± 6.89 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 68.94 ± 0.02 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1167.76 ± 7.70 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 64.97 ± 0.02 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log index a96e13b..b190589 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 222.34 ± 7.55 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 33.52 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 290.12 ± 2.48 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 32.65 ± 0.02 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log index 3b0cc22..12481a0 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1241.85 ± 15.18 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 72.57 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1178.27 ± 10.86 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.46 ± 0.62 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log index 5027639..95d24a2 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 176.98 ± 4.28 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 35.43 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 209.14 ± 0.10 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 34.83 ± 0.02 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log index 70fc750..ed6c60e 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1229.55 ± 20.23 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 72.45 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1172.23 ± 12.92 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.65 ± 0.62 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1__longctx32768.log index 0b7f117..8b694cc 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 176.47 ± 4.18 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 35.44 ± 0.02 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 209.05 ± 0.22 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 34.76 ± 0.25 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log index f27f9db..b1b4b06 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | pp512 | 846.24 ± 47.60 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | tg128 | 86.32 ± 0.04 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 882.98 ± 3.84 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 80.84 ± 0.03 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log index c8693bb..0e42549 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 114.18 ± 0.07 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | tg32 @ d32768 | 30.07 ± 0.04 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 111.65 ± 0.08 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 28.03 ± 0.02 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log index 4e7288a..3b18735 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | pp512 | 1005.90 ± 6.10 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | tg128 | 79.55 ± 6.96 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 1290.50 ± 7.83 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 83.79 ± 0.18 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log index 15abf33..253d842 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 153.83 ± 3.76 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | tg32 @ d32768 | 37.44 ± 0.05 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 194.26 ± 0.86 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 37.04 ± 0.04 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__fa1.log b/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__fa1.log new file mode 100644 index 0000000..eaf9c62 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 613.58 ± 2.84 | +| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 29.81 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log new file mode 100644 index 0000000..efca4d7 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 447.94 ± 2.75 | +| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.06 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log new file mode 100644 index 0000000..ba7778f --- /dev/null +++ b/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 583.48 ± 105.06 | +| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 29.85 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..4e99c3f --- /dev/null +++ b/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 439.93 ± 32.65 | +| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.09 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__fa1.log new file mode 100644 index 0000000..029c2b7 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 627.46 ± 3.12 | +| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 28.02 ± 0.04 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..be93303 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 527.78 ± 1.05 | +| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.22 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..b272219 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 623.64 ± 17.23 | +| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 29.16 ± 0.02 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..9521439 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 512.14 ± 3.00 | +| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.33 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__fa1.log b/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..1ce170b --- /dev/null +++ b/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 622.47 ± 8.34 | +| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 29.93 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..1e6ca64 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 443.00 ± 30.64 | +| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.97 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..1407abc --- /dev/null +++ b/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 600.08 ± 13.59 | +| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 29.99 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..57f366f --- /dev/null +++ b/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 457.86 ± 1.53 | +| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 27.08 ± 0.02 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..afbab06 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp512 | 396.36 ± 1.71 | +| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg128 | 30.90 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..a881bd7 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 208.44 ± 0.64 | +| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 26.08 ± 0.05 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_radv__fa1.log new file mode 100644 index 0000000..728a4b6 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp512 | 500.88 ± 3.30 | +| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg128 | 31.74 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..8b67617 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 420.42 ± 0.15 | +| qwen3next 80B.A3B Q8_0 | 86.94 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 28.05 ± 0.05 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log index f5ae063..2791109 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | pp512 | 193.67 ± 2.12 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | tg128 | 28.98 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 629.03 ± 5.19 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 31.37 ± 0.01 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log index d5132fd..ffe0750 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 217.45 ± 0.19 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 23.87 ± 3.54 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 439.65 ± 0.69 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 27.25 ± 0.01 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log index d64554b..ca5a35f 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | pp512 | 263.91 ± 3.82 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | tg128 | 28.97 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 648.58 ± 5.44 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 30.86 ± 0.77 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log index 206af26..dbd302b 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 296.64 ± 0.50 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 23.44 ± 4.28 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 457.60 ± 1.26 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 27.22 ± 0.01 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log index a915bc9..4ce1624 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | pp512 | 592.54 ± 4.38 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | tg128 | 27.45 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 634.72 ± 6.93 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 29.90 ± 0.01 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log index 99b98d0..07b6bfd 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 449.68 ± 1.06 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 25.10 ± 0.02 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 511.83 ± 7.51 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 27.45 ± 0.02 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log index 3d76bbf..cd84abc 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | pp512 | 592.83 ± 4.39 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | tg128 | 27.75 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 632.78 ± 59.05 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 30.53 ± 0.02 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log index c1c013e..70d9321 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 448.82 ± 1.02 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 25.07 ± 0.35 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 507.17 ± 2.44 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 27.44 ± 0.01 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log index b89bb7b..713a371 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | pp512 | 282.60 ± 2.04 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | tg128 | 28.89 ± 0.20 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 621.13 ± 82.64 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 31.52 ± 0.01 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log index 5d5c0db..aa85e0e 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 238.71 ± 0.62 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 23.93 ± 4.11 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 467.56 ± 0.55 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 28.26 ± 0.02 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log index b8878a5..ee04293 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | pp512 | 590.03 ± 3.05 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | tg128 | 28.73 ± 0.52 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 661.07 ± 5.80 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 31.54 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log index be1cd00..ca2c10d 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 413.78 ± 0.61 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 23.54 ± 3.30 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 432.86 ± 0.82 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 28.20 ± 0.02 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log index 35f7add..c349ffd 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | pp512 | 426.39 ± 3.26 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | tg128 | 31.84 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp512 | 436.50 ± 7.59 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg128 | 34.72 ± 0.02 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log index 9ed3aa4..e177acc 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 228.41 ± 1.50 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | tg32 @ d32768 | 22.47 ± 0.04 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 218.02 ± 0.85 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 28.50 ± 0.07 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log index 19a2fa7..f7c91c0 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | pp512 | 509.22 ± 20.34 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | tg128 | 29.92 ± 0.05 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp512 | 587.86 ± 37.36 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg128 | 36.28 ± 0.02 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log index faa295c..25f1422 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 370.94 ± 32.12 | -| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | tg32 @ d32768 | 26.00 ± 0.20 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 453.76 ± 0.75 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 31.67 ± 0.06 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__fa1.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__fa1.log new file mode 100644 index 0000000..6190e09 --- /dev/null +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | pp512 | 305.11 ± 1.38 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | tg128 | 19.18 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log new file mode 100644 index 0000000..3f12afa --- /dev/null +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 246.59 ± 0.45 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 16.49 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log new file mode 100644 index 0000000..c769c07 --- /dev/null +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | pp512 | 313.18 ± 2.32 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | tg128 | 19.62 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..6cacb57 --- /dev/null +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 243.74 ± 1.45 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 16.51 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1.log new file mode 100644 index 0000000..83fa89a --- /dev/null +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | pp512 | 311.68 ± 1.84 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | tg128 | 18.77 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..995277c --- /dev/null +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 273.72 ± 1.22 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.91 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..9330711 --- /dev/null +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | pp512 | 306.06 ± 2.55 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | tg128 | 19.25 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..4066e47 --- /dev/null +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 270.84 ± 0.74 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.98 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..0bb4c16 --- /dev/null +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | pp512 | 314.27 ± 4.13 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | tg128 | 19.66 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..482822d --- /dev/null +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 235.12 ± 5.24 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 18.36 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..3c70313 --- /dev/null +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | pp512 | 310.73 ± 1.65 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | tg128 | 19.61 ± 0.11 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..22ef68a --- /dev/null +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 241.56 ± 5.74 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 18.35 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..2aa7628 --- /dev/null +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | pp512 | 183.05 ± 1.84 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | tg128 | 21.31 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..9159adc --- /dev/null +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 102.85 ± 0.15 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 18.76 ± 0.02 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1.log new file mode 100644 index 0000000..ebcc2cb --- /dev/null +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | pp512 | 239.56 ± 7.45 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | tg128 | 21.68 ± 0.38 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..ba5992f --- /dev/null +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 203.34 ± 0.47 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 20.09 ± 0.02 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log new file mode 100644 index 0000000..88d3daf --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 508.43 ± 3.18 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 23.65 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log new file mode 100644 index 0000000..b6be3f4 --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 423.84 ± 0.73 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 21.64 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log new file mode 100644 index 0000000..388df5f --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 512.17 ± 3.01 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 23.69 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..8693fc9 --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 382.51 ± 1.54 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 21.63 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log new file mode 100644 index 0000000..955738e --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 544.11 ± 3.06 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 21.40 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..19940ae --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 407.19 ± 1.96 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 21.59 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..23b8db1 --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 550.28 ± 3.67 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 23.23 ± 0.18 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..221ccc6 --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 405.13 ± 1.85 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 21.72 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..3151c98 --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 541.57 ± 11.33 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 23.69 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..25444a3 --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 441.64 ± 9.63 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 22.18 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..532dfcd --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 509.57 ± 8.20 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 23.78 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..24b9961 --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 379.36 ± 1.33 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 22.20 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..ec9c2e2 --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 122.56 ± 0.40 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 11.56 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..af1901a --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 97.32 ± 0.17 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 10.95 ± 0.00 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log new file mode 100644 index 0000000..d914309 --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 309.96 ± 4.20 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 10.79 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..a17d6c3 --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 258.85 ± 0.77 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 10.39 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__fa1.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__fa1.log new file mode 100644 index 0000000..91bf9ac --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1071.38 ± 11.20 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 47.78 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__fa1__longctx32768.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__fa1__longctx32768.log new file mode 100644 index 0000000..15f4159 --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 669.09 ± 1.13 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 40.19 ± 0.03 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__hblt0__fa1.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__hblt0__fa1.log new file mode 100644 index 0000000..1c89299 --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1093.96 ± 6.37 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 47.95 ± 0.30 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..18626cd --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 661.40 ± 0.76 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 40.13 ± 0.03 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1.log new file mode 100644 index 0000000..77bc2b3 --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1079.44 ± 6.76 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 46.46 ± 0.01 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..28cdd02 --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 762.29 ± 2.68 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 40.46 ± 0.02 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..e8d6552 --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1082.35 ± 6.79 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 46.48 ± 0.04 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..e352549 --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 777.89 ± 1.08 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 40.24 ± 0.03 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..f34ef08 --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1092.86 ± 9.42 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 48.16 ± 0.30 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..ca64073 --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 655.39 ± 2.00 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 41.90 ± 0.03 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__hblt0__fa1.log new file mode 100644 index 0000000..90d466f --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1113.86 ± 6.42 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 48.10 ± 0.31 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..558b9cf --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 655.89 ± 1.47 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 41.98 ± 0.03 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..a947901 --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 661.63 ± 3.14 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 58.16 ± 0.08 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..dcdc814 --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 288.86 ± 0.53 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 44.24 ± 0.09 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1.log new file mode 100644 index 0000000..bc44011 --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 1013.40 ± 39.22 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 59.13 ± 0.07 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..2945779 --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 673.55 ± 0.64 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 48.93 ± 0.13 | + +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1.log index a9d4fa7..5893093 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | pp512 | 323.33 ± 0.27 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | tg128 | 14.24 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 854.20 ± 6.85 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.20 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log index ee21601..a017aaa 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 232.79 ± 5.34 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 11.65 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 392.45 ± 16.97 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.63 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log index 9ed3a0f..f21710a 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | pp512 | 324.44 ± 0.31 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | tg128 | 14.24 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 877.69 ± 1.71 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.19 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log index 45ba7e6..526bc80 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 229.19 ± 6.79 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 11.66 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 387.73 ± 18.58 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.63 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log index 86e7334..797411c 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | pp512 | 936.69 ± 1.33 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | tg128 | 14.23 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 869.53 ± 1.47 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.15 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log index 9364a43..d664914 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 258.34 ± 1.81 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 11.63 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 260.25 ± 4.30 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.60 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log index 2eda1f4..e90965b 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | pp512 | 935.37 ± 1.09 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | tg128 | 14.20 ± 0.02 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 870.35 ± 1.54 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.15 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log index 5f3c340..829f89e 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 261.44 ± 5.27 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 11.62 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 263.85 ± 5.85 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.61 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log index fb87a4a..116b328 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | pp512 | 943.63 ± 1.62 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | tg128 | 14.25 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 892.78 ± 1.04 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.19 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log index 14934cf..77f2bd0 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 396.59 ± 26.74 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 11.65 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 441.92 ± 38.71 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.63 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log index ba60108..2423cc9 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | pp512 | 942.52 ± 1.34 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | tg128 | 14.25 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 895.09 ± 0.81 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.19 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log index 878aaa3..1f6940d 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 376.68 ± 9.34 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 11.65 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 449.40 ± 28.98 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.63 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log index 55932ee..a32ca20 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | pp512 | 125.50 ± 0.06 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | tg128 | 14.45 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp512 | 122.72 ± 0.06 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg128 | 14.31 ± 0.01 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log index 41a43d8..ba09991 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 111.11 ± 0.04 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | tg32 @ d32768 | 11.40 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 107.34 ± 0.03 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 11.32 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log index edac157..c138142 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | pp512 | 687.05 ± 0.75 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | tg128 | 14.14 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp512 | 513.78 ± 16.10 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg128 | 14.07 ± 0.01 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log index 24d4dab..785012f 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 376.92 ± 18.46 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | tg32 @ d32768 | 11.72 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 258.63 ± 1.57 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 11.53 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1.log index 1ff6f83..f1ccf93 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | pp512 | 463.92 ± 1.19 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | tg128 | 4.02 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 324.28 ± 1.23 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 3.97 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log index b36f842..b0cb077 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 191.32 ± 3.30 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 3.68 ± 0.11 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 166.64 ± 2.06 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.70 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log index c643303..8531f51 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | pp512 | 528.00 ± 0.44 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | tg128 | 4.02 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 455.07 ± 0.55 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 3.97 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log index cc34693..714f6c0 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 201.67 ± 1.78 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 3.74 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 188.26 ± 1.59 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.70 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log index 00bcee5..47bc0c4 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | pp512 | 508.08 ± 0.85 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | tg128 | 4.00 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 426.08 ± 0.81 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 3.83 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log index 12a07a9..ca28d62 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 222.44 ± 2.25 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 3.72 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 209.69 ± 3.55 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.68 ± 0.01 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log index 97f96ee..f180c49 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | pp512 | 508.48 ± 0.88 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | tg128 | 4.00 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 440.33 ± 0.38 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 3.96 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log index 93404ec..5dc5e20 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 220.03 ± 0.98 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 3.72 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 202.85 ± 0.64 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.69 ± 0.01 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log index d8498c8..ea47115 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | pp512 | 549.57 ± 2.42 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | tg128 | 4.02 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 466.09 ± 1.03 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 3.98 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log index 08d310a..ceac7ca 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 215.98 ± 0.94 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 3.73 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 182.83 ± 1.03 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.68 ± 0.03 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log index 59af47c..2c5cf1e 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | pp512 | 529.01 ± 0.98 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | tg128 | 4.02 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 452.21 ± 1.03 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 3.98 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log index 359ca8d..e630a45 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 204.92 ± 2.92 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 3.73 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 181.81 ± 3.26 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.70 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log index db92211..f8267e7 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log @@ -1,8 +1,23 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | pp512 | 9.32 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | tg128 | 3.87 ± 0.00 | - -build: e0c93af2a (7938) +/lib64/libggml-base.so.0(+0x3c25) [0x7f25ac3cbc25] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f25ac3cbfeb] +/lib64/libggml-base.so.0(+0x16669) [0x7f25ac3de669] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7f25abb42bfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f25abb2cd3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7f25abb42ea8] +/lib64/libggml-vulkan.so.0(+0x16b68) [0x7f25ac488b68] +/lib64/libggml-vulkan.so.0(+0xfcfd0) [0x7f25ac56efd0] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7f25ac3e7092] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f25afcfce70] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f25afcff445] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f25afd05aaf] +/lib64/libllama.so.0(llama_decode+0xe) [0x7f25afd0742e] +/usr/sbin/llama-bench() [0x41cc3b] +/usr/sbin/llama-bench() [0x419a10] +/lib64/libc.so.6(+0x35b5) [0x7f25ab8135b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f25ab813668] +/usr/sbin/llama-bench() [0x41b595] +terminate called after throwing an instance of 'vk::DeviceLostError' + what(): vk::Queue::submit: ErrorDeviceLost +✖ ! [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002__fa1 failed (exit 0) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log index 7a34429..17cbfe3 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,23 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 9.20 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | tg32 @ d32768 | 3.60 ± 0.00 | - -build: e0c93af2a (7938) +/lib64/libggml-base.so.0(+0x3c25) [0x7f5b5347fc25] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f5b5347ffeb] +/lib64/libggml-base.so.0(+0x16669) [0x7f5b53492669] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7f5b52bf6bfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f5b52be0d3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7f5b52bf6ea8] +/lib64/libggml-vulkan.so.0(+0x16b68) [0x7f5b5353cb68] +/lib64/libggml-vulkan.so.0(+0xfcfd0) [0x7f5b53622fd0] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7f5b5349b092] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f5b56db0e70] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f5b56db3445] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f5b56db9aaf] +/lib64/libllama.so.0(llama_decode+0xe) [0x7f5b56dbb42e] +/usr/sbin/llama-bench() [0x41cc3b] +/usr/sbin/llama-bench() [0x41977f] +/lib64/libc.so.6(+0x35b5) [0x7f5b528c75b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f5b528c7668] +/usr/sbin/llama-bench() [0x41b595] +terminate called after throwing an instance of 'vk::DeviceLostError' + what(): vk::Queue::submit: ErrorDeviceLost +✖ ! [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log index 858bf96..be2d8b8 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | pp512 | 123.07 ± 0.27 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | tg128 | 3.92 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | pp512 | 91.65 ± 0.40 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | tg128 | 3.98 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log index 767256d..8fe70f2 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 82.96 ± 0.72 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | tg32 @ d32768 | 3.66 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 81.52 ± 0.33 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 3.73 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1.log index c50aeb9..92de1c6 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | pp512 | 2870.77 ± 12.89 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | tg128 | 84.57 ± 0.03 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2700.94 ± 1.79 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 77.69 ± 0.66 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1__longctx32768.log index 861d2dd..ab19336 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 1639.03 ± 15.14 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 61.51 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1564.27 ± 18.55 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 56.80 ± 0.48 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1.log index 7ec3e36..76309ba 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | pp512 | 2807.93 ± 16.33 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | tg128 | 84.66 ± 0.03 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2691.00 ± 4.60 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 77.86 ± 0.64 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1__longctx32768.log index aba12ed..85d1206 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 1262.15 ± 24.34 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 61.54 ± 0.04 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1563.05 ± 19.77 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 56.95 ± 0.08 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log index 6b110c7..7fd54a5 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | pp512 | 2891.85 ± 2.60 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | tg128 | 82.18 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2719.77 ± 6.47 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 73.32 ± 0.04 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log index 71df791..a88f066 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 1600.62 ± 30.98 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 60.21 ± 0.11 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1687.79 ± 33.87 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 54.09 ± 0.16 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log index 98f27b4..ba6263c 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | pp512 | 2893.75 ± 3.92 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | tg128 | 82.15 ± 0.02 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2713.06 ± 9.07 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 72.85 ± 0.04 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log index d1578cd..ea96dc2 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 1419.18 ± 40.21 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 60.24 ± 0.11 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1681.29 ± 17.71 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 54.11 ± 0.29 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log index fe1a281..f37ea3e 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | pp512 | 2805.65 ± 13.25 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | tg128 | 85.35 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2666.49 ± 3.29 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 78.31 ± 0.89 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log index ef479c4..769b2bf 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 1215.66 ± 10.33 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 62.02 ± 0.03 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1530.18 ± 18.45 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 57.72 ± 0.36 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log index da4aac2..0e4d278 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | pp512 | 2800.57 ± 47.75 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | tg128 | 85.47 ± 0.02 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2674.53 ± 5.86 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 78.93 ± 0.64 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1__longctx32768.log index e240976..d6379ae 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 1214.20 ± 13.26 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 62.03 ± 0.03 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1534.58 ± 20.23 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 57.65 ± 0.44 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log index 6ebe735..9325e89 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | pp512 | 657.19 ± 0.41 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | tg128 | 86.55 ± 0.10 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp512 | 647.40 ± 0.53 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg128 | 79.07 ± 0.08 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log index 404a158..e54640a 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 493.70 ± 0.98 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | tg32 @ d32768 | 58.57 ± 0.11 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 455.63 ± 1.03 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 54.86 ± 0.17 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log index 0537089..ed9c6f1 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | pp512 | 1977.82 ± 204.87 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | tg128 | 91.09 ± 3.96 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp512 | 2479.97 ± 41.40 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg128 | 87.24 ± 0.13 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log index e118216..c008393 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 1149.92 ± 30.21 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | tg32 @ d32768 | 67.86 ± 0.22 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 1759.67 ± 6.72 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 64.91 ± 0.04 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1.log index f098ce7..83045a0 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | pp512 | 181.09 ± 1.36 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | tg128 | 51.77 ± 0.73 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 631.59 ± 4.27 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 50.91 ± 0.01 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1__longctx32768.log index 0d8b56a..dd99083 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 217.33 ± 0.32 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 36.51 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 303.63 ± 0.57 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 36.07 ± 0.03 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1.log index d6ccfa5..8ff8feb 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | pp512 | 181.23 ± 1.39 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | tg128 | 52.06 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 610.41 ± 53.09 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.34 ± 0.01 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log index 9bfd24d..d0b590a 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 218.30 ± 0.10 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 34.08 ± 4.33 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 302.82 ± 1.30 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 36.12 ± 0.02 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log index a05b582..492fecb 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | pp512 | 681.58 ± 4.94 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | tg128 | 50.85 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 648.44 ± 6.33 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 49.85 ± 0.04 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log index 5948f34..c6c534c 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 396.76 ± 35.92 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.13 ± 0.05 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 442.64 ± 0.84 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 39.69 ± 0.34 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log index abd20b5..8d1b5bf 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | pp512 | 678.97 ± 4.29 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | tg128 | 51.88 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 639.43 ± 31.93 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 50.99 ± 0.01 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log index e3aae6c..c1bacb6 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 400.64 ± 35.51 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 37.97 ± 3.34 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 438.75 ± 1.06 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 39.75 ± 0.02 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log index 5365be2..d70f3d4 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | pp512 | 649.28 ± 39.54 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | tg128 | 52.00 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 596.69 ± 97.42 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.38 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log index c611dc9..9670ec2 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 259.94 ± 5.81 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.30 ± 0.03 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 308.13 ± 1.66 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 35.43 ± 8.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log index 83fdee3..0857b4a 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | pp512 | 666.65 ± 12.50 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | tg128 | 52.05 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 641.07 ± 11.17 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.35 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log index 420ec5d..a189399 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 261.35 ± 6.47 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 40.36 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 314.50 ± 0.28 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 40.04 ± 0.03 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log index 4e6b4ce..e8675e6 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | pp512 | 643.04 ± 39.69 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | tg128 | 54.00 ± 0.04 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 576.81 ± 2.43 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 51.18 ± 0.04 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log index 394417c..315ed48 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 195.45 ± 2.65 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | tg32 @ d32768 | 37.02 ± 0.03 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 213.74 ± 0.68 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 34.52 ± 0.07 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log index 40087c2..c0ad41d 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | pp512 | 597.02 ± 9.82 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | tg128 | 57.38 ± 0.04 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 633.21 ± 13.06 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 56.15 ± 0.02 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log index e8acd38..fae75b7 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 278.37 ± 7.19 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | tg32 @ d32768 | 42.78 ± 0.09 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 287.49 ± 1.21 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 42.67 ± 0.03 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2__fa1.log index 78c56b0..5e219c6 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | pp512 | 547.85 ± 6.58 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | tg128 | 73.52 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1638.53 ± 13.50 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.67 ± 0.07 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2__fa1__longctx32768.log index e4ebb3b..ca3a5f0 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 402.32 ± 0.67 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 52.50 ± 0.02 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 488.89 ± 0.54 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 51.91 ± 0.03 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1.log index bb60d45..6094846 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | pp512 | 546.41 ± 6.71 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | tg128 | 73.52 ± 0.02 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1631.29 ± 15.38 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.62 ± 0.09 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1__longctx32768.log index f06a549..f2a466a 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 402.65 ± 1.50 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 52.58 ± 0.02 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 492.09 ± 1.86 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 51.93 ± 0.04 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log index 77a4b05..a7cf323 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | pp512 | 1779.88 ± 16.15 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | tg128 | 73.26 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1691.01 ± 16.68 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.07 ± 0.01 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log index 7323d53..45bb062 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 583.30 ± 9.18 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 57.85 ± 0.02 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 730.11 ± 1.16 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 57.02 ± 0.07 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log index 537f920..a3bdd9d 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | pp512 | 1785.44 ± 15.68 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | tg128 | 73.22 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1690.30 ± 13.53 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.00 ± 0.02 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log index 9599307..edcc380 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 600.15 ± 13.61 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 57.78 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 730.49 ± 1.20 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 56.89 ± 0.05 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log index adb683c..932a36a 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | pp512 | 1742.62 ± 12.05 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | tg128 | 73.51 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1668.50 ± 13.61 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.68 ± 0.10 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log index 46f28e1..65c4caa 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 428.95 ± 5.63 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 57.82 ± 0.02 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 507.77 ± 2.81 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 57.46 ± 0.02 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1.log index c06bd1f..50262df 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | pp512 | 1730.96 ± 9.70 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | tg128 | 73.53 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1642.70 ± 14.12 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.75 ± 0.10 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1__longctx32768.log index e0f62d9..f425c83 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 425.86 ± 3.58 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 57.90 ± 0.02 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 507.84 ± 1.97 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 57.32 ± 0.02 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log index fd7fc59..fa14b74 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | pp512 | 1300.97 ± 78.99 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | tg128 | 77.58 ± 0.03 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1303.99 ± 6.84 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 73.68 ± 0.08 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log index 92c9d52..99d5ac7 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 337.80 ± 4.40 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | tg32 @ d32768 | 53.06 ± 0.10 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 364.73 ± 0.33 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 49.99 ± 0.21 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log index 5534eab..917f775 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | pp512 | 1397.71 ± 70.15 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | tg128 | 80.99 ± 0.06 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1577.96 ± 12.64 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 78.94 ± 0.01 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log index 9f1beb7..97ddca5 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 416.91 ± 7.90 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | tg32 @ d32768 | 60.56 ± 0.77 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 541.12 ± 0.20 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 60.77 ± 0.08 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7_2__fa1.log index 4aeb30d..99ce5e7 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7_2__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | pp512 | 549.58 ± 0.25 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | tg128 | 50.55 ± 0.02 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1543.27 ± 2.63 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.61 ± 0.14 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7_2__fa1__longctx32768.log index a671c82..0dc8ee6 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2__fa1__longctx32768.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7_2__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 144.34 ± 1.03 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 5.60 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 165.68 ± 0.88 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.65 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1.log index 8caec46..dd483cb 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | pp512 | 548.97 ± 0.14 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | tg128 | 50.52 ± 0.02 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1539.48 ± 5.61 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.57 ± 0.15 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1__longctx32768.log index 259fde9..5a66ad0 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1__longctx32768.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 143.70 ± 0.31 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 5.60 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 166.31 ± 1.47 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.65 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log index d34295f..5103c84 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | pp512 | 1597.02 ± 1.89 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | tg128 | 51.01 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1528.37 ± 6.40 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.14 ± 0.02 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log index 0f41bbb..e0b84ff 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 193.62 ± 1.29 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 6.93 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 196.35 ± 2.62 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.97 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log index ed7cd83..acaa1cf 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | pp512 | 1598.36 ± 1.08 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | tg128 | 51.01 ± 0.02 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1521.12 ± 5.74 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.22 ± 0.02 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log index e9332b6..15d1b59 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 191.18 ± 2.26 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 6.94 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 196.03 ± 1.28 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.97 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log index dd9ab75..6e1ba6a 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | pp512 | 1590.62 ± 1.92 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | tg128 | 51.17 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1507.68 ± 1.23 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.41 ± 0.14 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log index 8634ecc..c083ed2 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 210.19 ± 4.57 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 5.56 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 206.97 ± 1.96 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.62 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log index 08f165c..c51b8f4 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | pp512 | 1590.40 ± 2.71 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | tg128 | 51.21 ± 0.01 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1508.58 ± 2.01 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.44 ± 0.15 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1__longctx32768.log index 355d6a7..25dcbb6 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1__longctx32768.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 216.78 ± 3.22 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 5.56 ± 0.00 | +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 206.54 ± 0.91 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.62 ± 0.00 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log index b7dc396..a1c3881 100644 --- a/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | pp512 | 349.95 ± 0.30 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | tg128 | 56.00 ± 0.22 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 324.82 ± 0.45 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 55.43 ± 0.14 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log index 34caa8e..0c19fe3 100644 --- a/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 152.53 ± 0.11 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | tg32 @ d32768 | 9.29 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 144.29 ± 1.09 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 10.20 ± 0.01 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1.log index e89bb38..f0e5a2f 100644 --- a/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | pp512 | 1355.55 ± 2.34 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | tg128 | 55.88 ± 0.13 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 1313.97 ± 1.29 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 55.59 ± 0.05 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log index 2ca9650..50c5ec7 100644 --- a/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log @@ -1,8 +1,8 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | pp2048 @ d32768 | 246.20 ± 1.24 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | tg32 @ d32768 | 8.76 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 208.18 ± 2.24 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 8.52 ± 0.01 | -build: e0c93af2a (7938) +build: 2405d59cb (8577) diff --git a/benchmark/results/system_info.json b/benchmark/results/system_info.json index b7fac37..2efeb7c 100644 --- a/benchmark/results/system_info.json +++ b/benchmark/results/system_info.json @@ -1 +1 @@ -{"distro": "Fedora Linux 43 (Workstation Edition)", "kernel": "6.18.5-200.fc43.x86_64", "linux_firmware": "linux-firmware-20260110-1.fc43.noarch", "timestamp": "04 Feb 2026"} +{"distro": "Fedora Linux 43 (Workstation Edition)", "kernel": "6.19.9-200.fc43.x86_64", "linux_firmware": "linux-firmware-20260309-1.fc43.noarch", "timestamp": "29 Mar 2026"} diff --git a/docs/results.json b/docs/results.json index 3f9ca19..ce60d51 100644 --- a/docs/results.json +++ b/docs/results.json @@ -1,13 +1,17 @@ { "meta": { - "generated_at": "2026-02-09T13:29:25Z", + "generated_at": "2026-03-30T06:55:07Z", "system_info": { "distro": "Fedora Linux 43 (Workstation Edition)", - "kernel": "6.18.5-200.fc43.x86_64", - "linux_firmware": "linux-firmware-20260110-1.fc43.noarch", - "timestamp": "04 Feb 2026" + "kernel": "6.19.9-200.fc43.x86_64", + "linux_firmware": "linux-firmware-20260309-1.fc43.noarch", + "timestamp": "29 Mar 2026" }, "llamacpp_builds": [ + { + "hash": "2405d59cb", + "number": "8577" + }, { "hash": "e0c93af2a", "number": "7938" @@ -36,377 +40,349 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 72.93, - "tps_std": 0.06, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 2.95, - "tps_std": 0.06, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 16.57, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.08, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 71.58, - "tps_std": 0.06, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 2.99, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 16.48, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.07, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 79.51, - "tps_std": 0.07, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 2.72, + "tps_mean": 78.37, "tps_std": 0.1, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 125.03, "file_size_gib": 70.31, "name_params_b": 125.03, "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 17.57, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.15, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 2.96, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 125.03, "file_size_gib": 70.31, "name_params_b": 125.03, "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 79.24, - "tps_std": 0.1, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 17.07, + "tps_std": 0.06, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 125.03, "file_size_gib": 70.31, "name_params_b": 125.03, "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 2.07, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", "fa": true, "context": "default", "context_tokens": null, - "test": "tg128", - "tps_mean": 2.67, + "test": "pp512", + "tps_mean": 78.3, "tps_std": 0.11, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 2.98, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 17.08, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 2.08, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 78.37, + "tps_std": 0.13, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 2.76, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 17.56, + "tps_std": 0.07, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 2.09, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 78.63, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, "params_b": 125.03, "file_size_gib": 70.31, "name_params_b": 125.03, @@ -414,8 +390,36 @@ "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 2.8, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" } }, { @@ -428,13 +432,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 17.53, + "tps_mean": 17.58, "tps_std": 0.03, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 125.03, "file_size_gib": 70.31, "name_params_b": 125.03, @@ -442,8 +446,8 @@ "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -456,13 +460,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 2.15, - "tps_std": 0.01, + "tps_mean": 2.08, + "tps_std": 0.03, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 125.03, "file_size_gib": 70.31, "name_params_b": 125.03, @@ -470,8 +474,8 @@ "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -484,13 +488,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 78.28, - "tps_std": 0.06, + "tps_mean": 80.49, + "tps_std": 0.14, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 125.03, "file_size_gib": 70.31, "name_params_b": 125.03, @@ -498,8 +502,8 @@ "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -512,13 +516,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 2.98, - "tps_std": 0.04, + "tps_mean": 2.99, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 125.03, "file_size_gib": 70.31, "name_params_b": 125.03, @@ -526,8 +530,8 @@ "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -540,13 +544,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 17.18, - "tps_std": 0.03, + "tps_mean": 21.15, + "tps_std": 0.06, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 125.03, "file_size_gib": 70.31, "name_params_b": 125.03, @@ -554,8 +558,8 @@ "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -568,13 +572,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 2.06, - "tps_std": 0.02, + "tps_mean": 2.07, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 125.03, "file_size_gib": 70.31, "name_params_b": 125.03, @@ -582,8 +586,8 @@ "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -596,13 +600,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 80.59, - "tps_std": 0.1, + "tps_mean": 78.97, + "tps_std": 0.09, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 125.03, "file_size_gib": 70.31, "name_params_b": 125.03, @@ -610,8 +614,8 @@ "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -630,7 +634,7 @@ "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 125.03, "file_size_gib": 70.31, "name_params_b": 125.03, @@ -638,8 +642,8 @@ "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -652,13 +656,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 17.27, - "tps_std": 0.02, + "tps_mean": 21.17, + "tps_std": 0.28, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 125.03, "file_size_gib": 70.31, "name_params_b": 125.03, @@ -666,8 +670,8 @@ "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -686,7 +690,7 @@ "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 125.03, "file_size_gib": 70.31, "name_params_b": 125.03, @@ -694,8 +698,8 @@ "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -707,52 +711,21 @@ "fa": true, "context": "default", "context_tokens": null, - "test": "pp512", - "tps_mean": 17.65, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, "mmap": null, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, + "params_b": null, + "file_size_gib": null, + "name_params_b": 123.0, "quant": "Q4_K_XL", "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 3.0, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } + "build": null }, { "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", @@ -763,52 +736,21 @@ "fa": true, "context": "longctx32768", "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 4.94, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, "mmap": null, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, + "params_b": null, + "file_size_gib": null, + "name_params_b": 123.0, "quant": "Q4_K_XL", "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 1.69, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } + "build": null }, { "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", @@ -820,13 +762,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 54.76, - "tps_std": 11.46, + "tps_mean": 47.2, + "tps_std": 0.03, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 125.03, "file_size_gib": 70.31, "name_params_b": 125.03, @@ -834,8 +776,8 @@ "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -848,13 +790,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 3.0, + "tps_mean": 2.99, "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 125.03, "file_size_gib": 70.31, "name_params_b": 125.03, @@ -862,8 +804,8 @@ "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -876,13 +818,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 7.15, + "tps_mean": 7.39, "tps_std": 0.02, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 125.03, "file_size_gib": 70.31, "name_params_b": 125.03, @@ -890,8 +832,8 @@ "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -904,13 +846,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 2.27, + "tps_mean": 2.6, "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 125.03, "file_size_gib": 70.31, "name_params_b": 125.03, @@ -918,8 +860,8 @@ "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -932,13 +874,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 454.95, - "tps_std": 1.9, + "tps_mean": 435.72, + "tps_std": 2.11, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 29.94, "file_size_gib": 55.79, "name_params_b": 29.94, @@ -946,8 +888,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -960,13 +902,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 22.26, - "tps_std": 0.03, + "tps_mean": 21.38, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 29.94, "file_size_gib": 55.79, "name_params_b": 29.94, @@ -974,8 +916,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -988,13 +930,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 94.79, - "tps_std": 0.56, + "tps_mean": 87.93, + "tps_std": 0.11, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 29.94, "file_size_gib": 55.79, "name_params_b": 29.94, @@ -1002,8 +944,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -1016,13 +958,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 16.48, - "tps_std": 0.09, + "tps_mean": 15.33, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 29.94, "file_size_gib": 55.79, "name_params_b": 29.94, @@ -1030,8 +972,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -1044,13 +986,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 425.21, - "tps_std": 1.79, + "tps_mean": 403.83, + "tps_std": 1.74, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 29.94, "file_size_gib": 55.79, "name_params_b": 29.94, @@ -1058,8 +1000,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -1072,13 +1014,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 23.41, - "tps_std": 0.03, + "tps_mean": 21.37, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 29.94, "file_size_gib": 55.79, "name_params_b": 29.94, @@ -1086,8 +1028,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -1100,13 +1042,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 93.83, - "tps_std": 0.4, + "tps_mean": 87.49, + "tps_std": 0.23, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 29.94, "file_size_gib": 55.79, "name_params_b": 29.94, @@ -1114,8 +1056,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -1128,13 +1070,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 16.55, + "tps_mean": 15.33, "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 29.94, "file_size_gib": 55.79, "name_params_b": 29.94, @@ -1142,8 +1084,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -1156,13 +1098,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 407.15, - "tps_std": 2.05, + "tps_mean": 393.19, + "tps_std": 1.92, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 29.94, "file_size_gib": 55.79, "name_params_b": 29.94, @@ -1170,8 +1112,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -1184,13 +1126,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 21.51, - "tps_std": 0.03, + "tps_mean": 19.36, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 29.94, "file_size_gib": 55.79, "name_params_b": 29.94, @@ -1198,8 +1140,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -1212,13 +1154,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 101.09, - "tps_std": 0.37, + "tps_mean": 94.74, + "tps_std": 0.05, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 29.94, "file_size_gib": 55.79, "name_params_b": 29.94, @@ -1226,8 +1168,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -1240,13 +1182,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 16.23, - "tps_std": 0.0, + "tps_mean": 15.06, + "tps_std": 0.18, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 29.94, "file_size_gib": 55.79, "name_params_b": 29.94, @@ -1254,8 +1196,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -1268,13 +1210,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 414.23, - "tps_std": 2.09, + "tps_mean": 393.15, + "tps_std": 2.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 29.94, "file_size_gib": 55.79, "name_params_b": 29.94, @@ -1282,8 +1224,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -1296,13 +1238,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 23.11, + "tps_mean": 21.11, "tps_std": 0.03, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 29.94, "file_size_gib": 55.79, "name_params_b": 29.94, @@ -1310,8 +1252,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -1324,13 +1266,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 100.06, - "tps_std": 0.38, + "tps_mean": 94.55, + "tps_std": 0.3, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 29.94, "file_size_gib": 55.79, "name_params_b": 29.94, @@ -1338,8 +1280,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -1352,13 +1294,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 15.97, - "tps_std": 0.45, + "tps_mean": 15.18, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 29.94, "file_size_gib": 55.79, "name_params_b": 29.94, @@ -1366,8 +1308,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -1380,13 +1322,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 489.62, - "tps_std": 3.63, + "tps_mean": 476.23, + "tps_std": 2.21, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 29.94, "file_size_gib": 55.79, "name_params_b": 29.94, @@ -1394,8 +1336,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -1408,13 +1350,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 23.4, - "tps_std": 0.02, + "tps_mean": 21.36, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 29.94, "file_size_gib": 55.79, "name_params_b": 29.94, @@ -1422,8 +1364,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -1436,13 +1378,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 92.48, - "tps_std": 1.13, + "tps_mean": 86.79, + "tps_std": 0.15, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 29.94, "file_size_gib": 55.79, "name_params_b": 29.94, @@ -1450,8 +1392,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -1464,13 +1406,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 16.5, + "tps_mean": 15.32, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 29.94, "file_size_gib": 55.79, "name_params_b": 29.94, @@ -1478,8 +1420,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -1492,13 +1434,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 425.86, - "tps_std": 2.29, + "tps_mean": 401.13, + "tps_std": 5.52, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 29.94, "file_size_gib": 55.79, "name_params_b": 29.94, @@ -1506,8 +1448,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -1520,13 +1462,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 23.41, + "tps_mean": 21.35, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 29.94, "file_size_gib": 55.79, "name_params_b": 29.94, @@ -1534,8 +1476,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -1548,13 +1490,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 92.06, - "tps_std": 0.08, + "tps_mean": 85.92, + "tps_std": 0.13, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 29.94, "file_size_gib": 55.79, "name_params_b": 29.94, @@ -1562,8 +1504,8 @@ "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -1576,475 +1518,8446 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", + "tps_mean": 15.3, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 114.88, + "tps_std": 0.15, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 10.48, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 10.15, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 5.05, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 342.11, + "tps_std": 5.47, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 9.48, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 102.93, + "tps_std": 0.11, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 7.82, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 905.79, + "tps_std": 2.84, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 32.96, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 91.97, + "tps_std": 0.19, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 20.58, + "tps_std": 0.07, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 893.68, + "tps_std": 28.19, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 33.2, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 91.83, + "tps_std": 0.16, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 20.62, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 903.39, + "tps_std": 1.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 31.04, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 99.84, + "tps_std": 0.23, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 20.33, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 906.25, + "tps_std": 1.64, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 32.57, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 100.07, + "tps_std": 0.2, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 20.35, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 902.85, + "tps_std": 59.94, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 33.16, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 89.75, + "tps_std": 0.31, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 20.59, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 906.42, + "tps_std": 2.57, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 33.18, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 89.99, + "tps_std": 0.26, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 20.6, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 499.98, + "tps_std": 1.29, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 39.01, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": null + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 853.46, + "tps_std": 6.7, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 40.38, + "tps_std": 1.22, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 133.27, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 21.17, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 673.71, + "tps_std": 1.42, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.35, + "tps_std": 0.17, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 160.3, + "tps_std": 0.21, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 7.08, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 748.37, + "tps_std": 4.67, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.42, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 165.11, + "tps_std": 1.59, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 7.08, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 706.16, + "tps_std": 3.76, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.39, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 166.61, + "tps_std": 1.47, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 7.05, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 710.61, + "tps_std": 4.31, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.39, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 161.22, + "tps_std": 2.64, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 7.06, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 881.45, + "tps_std": 3.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.44, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 213.5, + "tps_std": 0.7, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 7.08, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 735.98, + "tps_std": 5.08, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.44, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 205.29, + "tps_std": 0.84, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 7.08, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 14.0, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": null + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 14.0, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": null + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 166.51, + "tps_std": 0.62, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 7.94, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 72.65, + "tps_std": 0.24, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 6.7, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 259.71, + "tps_std": 1.51, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 15.74, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 302.29, + "tps_std": 0.44, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 15.58, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 263.95, + "tps_std": 1.57, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 16.02, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 305.52, + "tps_std": 0.18, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 15.59, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 260.98, + "tps_std": 1.56, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 15.6, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 324.69, + "tps_std": 0.27, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 15.38, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 262.3, + "tps_std": 1.42, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 15.86, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 324.11, + "tps_std": 0.41, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 15.45, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 228.5, + "tps_std": 1.06, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 16.07, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 284.43, + "tps_std": 0.38, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 15.76, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 236.84, + "tps_std": 1.32, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 16.04, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 290.04, + "tps_std": 0.11, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 15.76, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 140.84, + "tps_std": 0.27, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 13.97, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 106.21, + "tps_std": 0.16, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 13.21, + "tps_std": 0.32, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 190.66, + "tps_std": 7.11, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.41, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 191.12, + "tps_std": 0.46, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 13.85, + "tps_std": 0.26, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 194.62, + "tps_std": 1.88, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.64, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 49.93, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 8.41, + "tps_std": 0.37, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 194.32, + "tps_std": 1.84, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.8, + "tps_std": 0.11, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 49.97, + "tps_std": 0.14, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 8.63, + "tps_std": 0.11, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 197.89, + "tps_std": 1.84, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 12.96, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 70.32, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 8.22, + "tps_std": 0.11, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 198.04, + "tps_std": 1.9, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 13.11, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 70.27, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 8.02, + "tps_std": 0.31, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 197.46, + "tps_std": 1.16, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 15.31, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 48.93, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 10.34, + "tps_std": 0.41, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 194.0, + "tps_std": 1.99, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 15.33, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 49.33, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 10.46, + "tps_std": 0.34, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 106.76, + "tps_std": 0.77, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 16.49, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 26.54, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 9.57, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 158.81, + "tps_std": 2.4, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 17.16, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 28.25, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 11.49, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 474.42, + "tps_std": 2.29, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 25.36, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 189.58, + "tps_std": 0.12, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 18.53, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 479.78, + "tps_std": 2.72, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 25.37, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 180.26, + "tps_std": 0.37, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 18.54, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 556.96, + "tps_std": 2.81, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 23.05, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 238.1, + "tps_std": 0.11, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 18.26, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 556.8, + "tps_std": 4.26, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 25.16, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 237.85, + "tps_std": 0.37, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 18.46, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 503.77, + "tps_std": 3.42, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 25.54, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 182.59, + "tps_std": 0.07, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 18.25, + "tps_std": 1.21, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 482.99, + "tps_std": 0.86, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 25.49, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 175.78, + "tps_std": 0.14, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 18.94, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 171.39, + "tps_std": 0.81, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 10.48, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 73.62, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 8.49, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 369.06, + "tps_std": 2.58, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 9.38, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 140.81, + "tps_std": 0.52, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 8.24, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 994.33, + "tps_std": 11.86, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 55.87, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 209.15, + "tps_std": 0.11, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 30.88, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 994.22, + "tps_std": 10.06, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 55.84, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 208.93, + "tps_std": 0.21, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 30.9, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1009.37, + "tps_std": 9.64, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 53.13, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 285.49, + "tps_std": 0.18, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 30.53, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1012.69, + "tps_std": 9.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 54.94, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 285.25, + "tps_std": 0.16, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 30.5, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1023.24, + "tps_std": 11.08, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 56.92, + "tps_std": 0.17, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 206.37, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 31.69, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1012.11, + "tps_std": 7.91, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 56.92, + "tps_std": 0.21, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 205.56, + "tps_std": 0.23, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 31.62, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 828.53, + "tps_std": 4.66, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 63.31, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 110.5, + "tps_std": 0.14, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 25.87, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1145.66, + "tps_std": 9.68, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 68.15, + "tps_std": 0.11, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 193.03, + "tps_std": 0.86, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 34.02, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1149.31, + "tps_std": 13.28, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 67.72, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 212.38, + "tps_std": 0.33, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 33.49, + "tps_std": 0.23, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1149.83, + "tps_std": 7.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 68.05, + "tps_std": 0.68, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 212.45, + "tps_std": 0.21, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 33.52, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1170.01, + "tps_std": 7.53, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 65.12, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 291.28, + "tps_std": 0.14, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 32.65, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1167.76, + "tps_std": 7.7, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 64.97, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 290.12, + "tps_std": 2.48, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 32.65, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1178.27, + "tps_std": 10.86, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 69.46, + "tps_std": 0.62, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 209.14, + "tps_std": 0.1, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 34.83, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1172.23, + "tps_std": 12.92, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 69.65, + "tps_std": 0.62, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 209.05, + "tps_std": 0.22, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 34.76, + "tps_std": 0.25, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 882.98, + "tps_std": 3.84, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 80.84, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 111.65, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 28.03, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1290.5, + "tps_std": 7.83, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 83.79, + "tps_std": 0.18, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 194.26, + "tps_std": 0.86, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 37.04, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", + "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 613.58, + "tps_std": 2.84, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.94, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", + "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 29.81, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.94, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", + "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 447.94, + "tps_std": 2.75, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.94, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", + "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 26.06, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.94, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", + "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 583.48, + "tps_std": 105.06, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.94, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", + "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 29.85, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.94, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", + "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 439.93, + "tps_std": 32.65, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.94, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", + "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 26.09, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.94, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", + "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 627.46, + "tps_std": 3.12, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.94, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", + "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 28.02, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.94, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", + "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 527.78, + "tps_std": 1.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.94, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", + "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 26.22, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.94, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", + "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 623.64, + "tps_std": 17.23, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.94, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", + "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 29.16, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.94, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", + "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 512.14, + "tps_std": 3.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.94, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", + "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 26.33, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.94, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", + "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 622.47, + "tps_std": 8.34, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.94, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", + "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 29.93, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.94, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", + "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 443.0, + "tps_std": 30.64, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.94, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", + "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 26.97, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.94, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", + "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 600.08, + "tps_std": 13.59, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.94, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", + "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 29.99, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.94, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", + "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 457.86, + "tps_std": 1.53, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.94, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", + "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 27.08, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.94, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", + "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 396.36, + "tps_std": 1.71, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.94, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", + "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 30.9, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.94, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", + "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 208.44, + "tps_std": 0.64, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.94, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", + "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 26.08, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.94, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", + "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 500.88, + "tps_std": 3.3, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.94, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", + "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 31.74, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.94, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", + "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 420.42, + "tps_std": 0.15, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.94, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003", + "model_clean": "Qwen3-Coder-Next-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 28.05, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.94, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Coder-Next-UD-Q8_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 629.03, + "tps_std": 5.19, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 31.37, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 439.65, + "tps_std": 0.69, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 27.25, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 648.58, + "tps_std": 5.44, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 30.86, + "tps_std": 0.77, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 457.6, + "tps_std": 1.26, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 27.22, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 634.72, + "tps_std": 6.93, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 29.9, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 511.83, + "tps_std": 7.51, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 27.45, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 632.78, + "tps_std": 59.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 30.53, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 507.17, + "tps_std": 2.44, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 27.44, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 621.13, + "tps_std": 82.64, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 31.52, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 467.56, + "tps_std": 0.55, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 28.26, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 661.07, + "tps_std": 5.8, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 31.54, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 432.86, + "tps_std": 0.82, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 28.2, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 436.5, + "tps_std": 7.59, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 34.72, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 218.02, + "tps_std": 0.85, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 28.5, + "tps_std": 0.07, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 587.86, + "tps_std": 37.36, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 36.28, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 453.76, + "tps_std": 0.75, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 31.67, + "tps_std": 0.06, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 305.11, + "tps_std": 1.38, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 19.18, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 246.59, + "tps_std": 0.45, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 16.49, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 313.18, + "tps_std": 2.32, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 19.62, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 243.74, + "tps_std": 1.45, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", "tps_mean": 16.51, "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 106.42, - "tps_std": 0.08, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 10.87, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 6.09, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 8.28, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 333.1, - "tps_std": 6.48, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 9.51, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 78.99, - "tps_std": 0.25, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 8.13, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 398.34, - "tps_std": 1.32, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 35.94, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 90.22, - "tps_std": 4.88, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 22.35, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 398.87, - "tps_std": 1.21, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 36.09, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 92.13, - "tps_std": 0.15, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 21.56, - "tps_std": 1.34, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", "env": "rocm6_4_4", "env_base": "rocm6_4_4", "env_variant": null, @@ -2052,27 +9965,27 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 947.86, - "tps_std": 2.03, + "tps_mean": 311.68, + "tps_std": 1.84, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log", + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", "env": "rocm6_4_4", "env_base": "rocm6_4_4", "env_variant": null, @@ -2080,27 +9993,27 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 33.77, + "tps_mean": 18.77, "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1.log", + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", "env": "rocm6_4_4", "env_base": "rocm6_4_4", "env_variant": null, @@ -2108,27 +10021,27 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 84.85, - "tps_std": 1.04, + "tps_mean": 273.72, + "tps_std": 1.22, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", "env": "rocm6_4_4", "env_base": "rocm6_4_4", "env_variant": null, @@ -2136,27 +10049,27 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 21.89, - "tps_std": 0.04, + "tps_mean": 17.91, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", "env": "rocm6_4_4-hblt0", "env_base": "rocm6_4_4", "env_variant": "hblt0", @@ -2164,4507 +10077,27 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 952.84, - "tps_std": 2.21, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 35.23, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 84.01, - "tps_std": 0.58, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 21.97, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 983.72, - "tps_std": 3.21, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 36.2, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 80.32, - "tps_std": 1.28, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 22.31, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 955.1, - "tps_std": 4.53, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 36.16, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 81.34, - "tps_std": 1.8, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 22.32, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 368.78, - "tps_std": 0.17, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 40.8, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 6.35, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 18.75, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 877.18, - "tps_std": 8.15, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 40.07, - "tps_std": 0.78, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 90.27, - "tps_std": 0.42, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "GLM-4.7-Flash-UD-Q8_K_XL", - "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 23.07, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 29.94, - "file_size_gib": 32.7, - "name_params_b": 29.94, - "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 48.83, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 2.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 29.25, - "tps_std": 0.17, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.45, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 49.38, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 2.79, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 29.17, - "tps_std": 0.18, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.46, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 146.04, - "tps_std": 0.21, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 2.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 36.22, - "tps_std": 0.16, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.43, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 146.83, - "tps_std": 0.25, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 2.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 36.4, - "tps_std": 0.23, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.46, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 155.06, - "tps_std": 0.11, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 2.79, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 38.36, - "tps_std": 0.61, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.46, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 151.7, - "tps_std": 0.21, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 2.78, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 38.35, - "tps_std": 0.67, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.46, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 21.74, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 2.81, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 8.35, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.36, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 99.39, - "tps_std": 0.58, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 2.76, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 11.79, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.44, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 800.17, - "tps_std": 1.72, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 8.49, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 169.18, - "tps_std": 1.16, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 7.11, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 803.22, - "tps_std": 2.21, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 8.49, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 170.11, - "tps_std": 0.81, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 7.11, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 764.18, - "tps_std": 1.66, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 8.48, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 166.22, - "tps_std": 1.2, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 7.1, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 766.68, - "tps_std": 1.07, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 8.48, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 164.84, - "tps_std": 1.99, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 7.1, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 990.88, - "tps_std": 3.15, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 8.5, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 172.42, - "tps_std": 3.61, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 7.1, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 799.71, - "tps_std": 2.09, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 8.49, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 170.19, - "tps_std": 1.69, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 7.1, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 19.7, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 8.24, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 16.69, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 6.41, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 222.01, - "tps_std": 0.94, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 7.59, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 76.47, - "tps_std": 0.38, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Ministral-3-14B-Instruct-2512-BF16", - "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 6.39, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 13.51, - "file_size_gib": 25.16, - "name_params_b": 13.51, - "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 393.61, - "tps_std": 2.94, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 42.58, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 562.85, - "tps_std": 0.47, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 40.59, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 388.54, - "tps_std": 2.76, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 42.61, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 564.71, - "tps_std": 0.81, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 40.6, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1026.87, - "tps_std": 6.06, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 41.9, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 1042.36, - "tps_std": 2.24, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 40.08, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1027.41, - "tps_std": 6.28, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 42.05, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 1038.86, - "tps_std": 3.17, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 40.04, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1070.15, - "tps_std": 5.54, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 42.56, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 908.79, - "tps_std": 27.38, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 40.91, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1038.67, - "tps_std": 2.82, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 42.57, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 821.93, - "tps_std": 29.4, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 40.92, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 676.59, - "tps_std": 50.83, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 47.22, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 342.52, - "tps_std": 0.46, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 35.25, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 951.76, - "tps_std": 41.03, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 46.68, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 799.39, - "tps_std": 0.69, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 41.15, - "tps_std": 0.06, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 31.58, - "file_size_gib": 37.66, - "name_params_b": 31.58, - "quant": "Q8_K_XL", - "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 202.36, - "tps_std": 3.5, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 15.8, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 41.36, - "tps_std": 0.87, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 9.65, - "tps_std": 0.3, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 200.1, - "tps_std": 8.37, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 16.04, - "tps_std": 0.06, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 41.53, - "tps_std": 0.41, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 9.5, - "tps_std": 0.75, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 205.05, - "tps_std": 3.62, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 14.98, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 51.11, - "tps_std": 0.62, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 9.63, - "tps_std": 0.07, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 203.41, - "tps_std": 3.52, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 15.0, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 51.19, - "tps_std": 0.64, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 9.58, - "tps_std": 0.1, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 200.04, - "tps_std": 4.11, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 16.19, - "tps_std": 0.09, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 41.42, - "tps_std": 0.37, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 10.94, - "tps_std": 0.42, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 197.48, - "tps_std": 10.8, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 16.2, - "tps_std": 0.08, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 41.6, - "tps_std": 0.36, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 10.81, - "tps_std": 0.61, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 119.82, - "tps_std": 3.3, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 17.75, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 27.41, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 3.42, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 133.28, - "tps_std": 1.45, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 15.98, - "tps_std": 0.25, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 30.79, - "tps_std": 0.06, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 6.5, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 489.11, - "tps_std": 2.88, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 27.18, - "tps_std": 0.16, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 214.97, - "tps_std": 1.13, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 18.57, - "tps_std": 1.4, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 492.32, + "tps_mean": 306.06, "tps_std": 2.55, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log", + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 27.23, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 207.64, - "tps_std": 0.55, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 18.84, - "tps_std": 0.97, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 576.03, - "tps_std": 3.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 26.12, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 249.94, - "tps_std": 1.13, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 19.18, - "tps_std": 0.33, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 569.42, - "tps_std": 8.52, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", "env": "rocm6_4_4-hblt0", "env_base": "rocm6_4_4", "env_variant": "hblt0", @@ -6672,27 +10105,27 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 27.07, + "tps_mean": 19.25, "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", "env": "rocm6_4_4-hblt0", "env_base": "rocm6_4_4", "env_variant": "hblt0", @@ -6700,167 +10133,167 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 250.24, - "tps_std": 0.88, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 18.77, - "tps_std": 0.98, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 512.1, - "tps_std": 4.69, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 27.27, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 216.18, + "tps_mean": 270.84, "tps_std": 0.74, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 19.71, + "tps_mean": 17.98, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 314.27, + "tps_std": 4.13, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 19.66, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 235.12, + "tps_std": 5.24, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 18.36, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", "env": "rocm7-nightlies-hblt0", "env_base": "rocm7", "env_variant": "nightlies-hblt0", @@ -6868,27 +10301,27 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 493.72, - "tps_std": 3.45, + "tps_mean": 310.73, + "tps_std": 1.65, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", "env": "rocm7-nightlies-hblt0", "env_base": "rocm7", "env_variant": "nightlies-hblt0", @@ -6896,27 +10329,27 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 27.32, - "tps_std": 0.0, + "tps_mean": 19.61, + "tps_std": 0.11, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", "env": "rocm7-nightlies-hblt0", "env_base": "rocm7", "env_variant": "nightlies-hblt0", @@ -6924,27 +10357,27 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 209.02, - "tps_std": 0.16, + "tps_mean": 241.56, + "tps_std": 5.74, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", "env": "rocm7-nightlies-hblt0", "env_base": "rocm7", "env_variant": "nightlies-hblt0", @@ -6952,27 +10385,27 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 19.67, + "tps_mean": 18.35, "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", "env": "vulkan_amdvlk", "env_base": "vulkan_amdvlk", "env_variant": null, @@ -6980,27 +10413,27 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 168.95, - "tps_std": 7.69, + "tps_mean": 183.05, + "tps_std": 1.84, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", "env": "vulkan_amdvlk", "env_base": "vulkan_amdvlk", "env_variant": null, @@ -7008,223 +10441,223 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 10.62, + "tps_mean": 21.31, "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 102.85, + "tps_std": 0.15, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 18.76, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 239.56, + "tps_std": 7.45, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 21.68, + "tps_std": 0.38, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 203.34, + "tps_std": 0.47, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 20.09, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.5-35B-A3B-BF16", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 508.43, + "tps_std": 3.18, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", + "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 75.04, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 8.68, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 351.97, - "tps_std": 2.56, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 9.42, - "tps_std": 0.21, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 127.67, - "tps_std": 0.45, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 8.31, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 815.37, - "tps_std": 5.82, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.5-35B-A3B-BF16", "env": "rocm-7_2", "env_base": "rocm", "env_variant": "7_2", @@ -7232,2771 +10665,83 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 58.54, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 170.56, - "tps_std": 4.38, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 31.3, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 811.39, - "tps_std": 6.56, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 58.57, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 171.54, - "tps_std": 4.45, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 31.29, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1078.99, - "tps_std": 11.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 56.45, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 217.17, - "tps_std": 8.71, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 30.94, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1080.52, - "tps_std": 10.73, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 57.49, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 218.42, - "tps_std": 7.66, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 30.96, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1056.78, - "tps_std": 36.08, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 59.15, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 175.4, - "tps_std": 4.11, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 31.98, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1039.16, - "tps_std": 53.94, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 59.16, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 174.67, - "tps_std": 4.22, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 31.98, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 823.08, - "tps_std": 48.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 66.14, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 112.99, - "tps_std": 0.13, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 27.35, - "tps_std": 0.07, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1064.73, - "tps_std": 70.49, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 68.93, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 152.3, - "tps_std": 3.42, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 34.18, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1209.23, - "tps_std": 7.46, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 71.48, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 177.01, - "tps_std": 5.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 34.4, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1207.91, - "tps_std": 9.78, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 71.48, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 175.56, - "tps_std": 3.86, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 34.37, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1263.87, - "tps_std": 7.23, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 68.78, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 222.2, - "tps_std": 8.55, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 33.48, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1260.69, - "tps_std": 6.89, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 68.94, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 222.34, - "tps_std": 7.55, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 33.52, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1241.85, - "tps_std": 15.18, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 72.57, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 176.98, - "tps_std": 4.28, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 35.43, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1229.55, - "tps_std": 20.23, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 72.45, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 176.47, - "tps_std": 4.18, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 35.44, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 846.24, - "tps_std": 47.6, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 86.32, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 114.18, - "tps_std": 0.07, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 30.07, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1005.9, - "tps_std": 6.1, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 79.55, - "tps_std": 6.96, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 153.83, - "tps_std": 3.76, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 37.44, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 193.67, - "tps_std": 2.12, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 28.98, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 217.45, - "tps_std": 0.19, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 23.87, - "tps_std": 3.54, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 263.91, - "tps_std": 3.82, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 28.97, + "tps_mean": 23.65, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1.log", + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7_2-hblt0", + "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.5-35B-A3B-BF16", + "env": "rocm-7_2", "env_base": "rocm", - "env_variant": "7_2-hblt0", + "env_variant": "7_2", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 296.64, - "tps_std": 0.5, + "tps_mean": 423.84, + "tps_std": 0.73, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log", + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7_2-hblt0", + "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.5-35B-A3B-BF16", + "env": "rocm-7_2", "env_base": "rocm", - "env_variant": "7_2-hblt0", + "env_variant": "7_2", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 23.44, - "tps_std": 4.28, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 592.54, - "tps_std": 4.38, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 27.45, + "tps_mean": 21.64, "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log", + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 449.68, - "tps_std": 1.06, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 25.1, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 592.83, - "tps_std": 4.39, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 27.75, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 448.82, - "tps_std": 1.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 25.07, - "tps_std": 0.35, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 282.6, - "tps_std": 2.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 28.89, - "tps_std": 0.2, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 238.71, - "tps_std": 0.62, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 23.93, - "tps_std": 4.11, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 590.03, - "tps_std": 3.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 28.73, - "tps_std": 0.52, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 413.78, - "tps_std": 0.61, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 23.54, - "tps_std": 3.3, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 426.39, - "tps_std": 3.26, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 31.84, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 228.41, - "tps_std": 1.5, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 22.47, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 509.22, - "tps_std": 20.34, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 29.92, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 370.94, - "tps_std": 32.12, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 26.0, - "tps_std": 0.2, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 79.67, - "file_size_gib": 86.68, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 323.33, - "tps_std": 0.27, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 14.24, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 232.79, - "tps_std": 5.34, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 11.65, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.5-35B-A3B-BF16", "env": "rocm-7_2-hblt0", "env_base": "rocm", "env_variant": "7_2-hblt0", @@ -10004,27 +10749,27 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 324.44, - "tps_std": 0.31, + "tps_mean": 512.17, + "tps_std": 3.01, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log", + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.5-35B-A3B-BF16", "env": "rocm-7_2-hblt0", "env_base": "rocm", "env_variant": "7_2-hblt0", @@ -10032,27 +10777,27 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 14.24, + "tps_mean": 23.69, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log", + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.5-35B-A3B-BF16", "env": "rocm-7_2-hblt0", "env_base": "rocm", "env_variant": "7_2-hblt0", @@ -10060,27 +10805,27 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 229.19, - "tps_std": 6.79, + "tps_mean": 382.51, + "tps_std": 1.54, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log", + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.5-35B-A3B-BF16", "env": "rocm-7_2-hblt0", "env_base": "rocm", "env_variant": "7_2-hblt0", @@ -10088,27 +10833,27 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 11.66, - "tps_std": 0.0, + "tps_mean": 21.63, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log", + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.5-35B-A3B-BF16", "env": "rocm6_4_4", "env_base": "rocm6_4_4", "env_variant": null, @@ -10116,27 +10861,923 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 936.69, + "tps_mean": 544.11, + "tps_std": 3.06, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.5-35B-A3B-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 21.4, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.5-35B-A3B-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 407.19, + "tps_std": 1.96, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.5-35B-A3B-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 21.59, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.5-35B-A3B-BF16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 550.28, + "tps_std": 3.67, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.5-35B-A3B-BF16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 23.23, + "tps_std": 0.18, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.5-35B-A3B-BF16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 405.13, + "tps_std": 1.85, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.5-35B-A3B-BF16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 21.72, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.5-35B-A3B-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 541.57, + "tps_std": 11.33, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.5-35B-A3B-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 23.69, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.5-35B-A3B-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 441.64, + "tps_std": 9.63, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.5-35B-A3B-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 22.18, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.5-35B-A3B-BF16", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 509.57, + "tps_std": 8.2, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.5-35B-A3B-BF16", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 23.78, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.5-35B-A3B-BF16", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 379.36, "tps_std": 1.33, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log", + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.5-35B-A3B-BF16", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 22.2, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.5-35B-A3B-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 122.56, + "tps_std": 0.4, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.5-35B-A3B-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 11.56, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.5-35B-A3B-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 97.32, + "tps_std": 0.17, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.5-35B-A3B-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 10.95, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.5-35B-A3B-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 309.96, + "tps_std": 4.2, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.5-35B-A3B-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 10.79, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.5-35B-A3B-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 258.85, + "tps_std": 0.77, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.5-35B-A3B-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 10.39, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1071.38, + "tps_std": 11.2, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.7, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 47.78, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.7, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 669.09, + "tps_std": 1.13, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.7, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 40.19, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.7, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1093.96, + "tps_std": 6.37, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.7, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 47.95, + "tps_std": 0.3, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.7, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 661.4, + "tps_std": 0.76, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.7, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 40.13, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.7, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1079.44, + "tps_std": 6.76, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.7, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", "env": "rocm6_4_4", "env_base": "rocm6_4_4", "env_variant": null, @@ -10144,27 +11785,27 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 14.23, - "tps_std": 0.0, + "tps_mean": 46.46, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log", + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.7, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", "env": "rocm6_4_4", "env_base": "rocm6_4_4", "env_variant": null, @@ -10172,30 +11813,702 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 258.34, - "tps_std": 1.81, + "tps_mean": 762.29, + "tps_std": 2.68, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.7, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 40.46, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.7, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1082.35, + "tps_std": 6.79, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.7, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 46.48, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.7, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 777.89, + "tps_std": 1.08, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.7, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 40.24, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.7, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1092.86, + "tps_std": 9.42, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.7, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 48.16, + "tps_std": 0.3, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.7, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 655.39, + "tps_std": 2.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.7, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 41.9, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.7, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1113.86, + "tps_std": 6.42, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.7, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 48.1, + "tps_std": 0.31, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.7, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 655.89, + "tps_std": 1.47, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.7, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 41.98, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.7, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 661.63, + "tps_std": 3.14, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.7, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 58.16, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.7, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 288.86, + "tps_std": 0.53, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.7, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 44.24, + "tps_std": 0.09, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.7, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1013.4, + "tps_std": 39.22, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.7, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 59.13, + "tps_std": 0.07, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.7, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 673.55, + "tps_std": 0.64, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.7, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 48.93, + "tps_std": 0.13, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.7, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" } }, { "model": "gemma-3-12b-it-UD-Q8_K_XL", "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 854.2, + "tps_std": 6.85, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.2, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 392.45, + "tps_std": 16.97, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", "fa": true, "context": "longctx32768", "context_tokens": 32768, @@ -10206,7 +12519,203 @@ "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 877.69, + "tps_std": 1.71, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.19, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 387.73, + "tps_std": 18.58, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 11.63, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7_2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 869.53, + "tps_std": 1.47, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.15, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 260.25, + "tps_std": 4.3, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, "params_b": 11.77, "file_size_gib": 13.4, "name_params_b": 11.77, @@ -10214,8 +12723,36 @@ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 11.6, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" } }, { @@ -10228,13 +12765,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 935.37, - "tps_std": 1.09, + "tps_mean": 870.35, + "tps_std": 1.54, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 11.77, "file_size_gib": 13.4, "name_params_b": 11.77, @@ -10242,8 +12779,8 @@ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -10256,13 +12793,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 14.2, - "tps_std": 0.02, + "tps_mean": 14.15, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 11.77, "file_size_gib": 13.4, "name_params_b": 11.77, @@ -10270,8 +12807,8 @@ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -10284,13 +12821,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 261.44, - "tps_std": 5.27, + "tps_mean": 263.85, + "tps_std": 5.85, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 11.77, "file_size_gib": 13.4, "name_params_b": 11.77, @@ -10298,8 +12835,8 @@ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -10312,13 +12849,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 11.62, + "tps_mean": 11.61, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 11.77, "file_size_gib": 13.4, "name_params_b": 11.77, @@ -10326,8 +12863,8 @@ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -10340,13 +12877,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 943.63, - "tps_std": 1.62, + "tps_mean": 892.78, + "tps_std": 1.04, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 11.77, "file_size_gib": 13.4, "name_params_b": 11.77, @@ -10354,8 +12891,8 @@ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -10368,13 +12905,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 14.25, + "tps_mean": 14.19, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 11.77, "file_size_gib": 13.4, "name_params_b": 11.77, @@ -10382,8 +12919,8 @@ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -10396,13 +12933,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 396.59, - "tps_std": 26.74, + "tps_mean": 441.92, + "tps_std": 38.71, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 11.77, "file_size_gib": 13.4, "name_params_b": 11.77, @@ -10410,8 +12947,8 @@ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -10424,13 +12961,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 11.65, + "tps_mean": 11.63, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 11.77, "file_size_gib": 13.4, "name_params_b": 11.77, @@ -10438,8 +12975,8 @@ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -10452,13 +12989,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 942.52, - "tps_std": 1.34, + "tps_mean": 895.09, + "tps_std": 0.81, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 11.77, "file_size_gib": 13.4, "name_params_b": 11.77, @@ -10466,8 +13003,8 @@ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -10480,13 +13017,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 14.25, + "tps_mean": 14.19, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 11.77, "file_size_gib": 13.4, "name_params_b": 11.77, @@ -10494,8 +13031,8 @@ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -10508,13 +13045,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 376.68, - "tps_std": 9.34, + "tps_mean": 449.4, + "tps_std": 28.98, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 11.77, "file_size_gib": 13.4, "name_params_b": 11.77, @@ -10522,8 +13059,8 @@ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -10536,13 +13073,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 11.65, + "tps_mean": 11.63, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 11.77, "file_size_gib": 13.4, "name_params_b": 11.77, @@ -10550,8 +13087,8 @@ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7-nightlies__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -10564,13 +13101,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 125.5, + "tps_mean": 122.72, "tps_std": 0.06, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 11.77, "file_size_gib": 13.4, "name_params_b": 11.77, @@ -10578,8 +13115,8 @@ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -10592,13 +13129,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 14.45, - "tps_std": 0.0, + "tps_mean": 14.31, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 11.77, "file_size_gib": 13.4, "name_params_b": 11.77, @@ -10606,8 +13143,8 @@ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -10620,13 +13157,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 111.11, - "tps_std": 0.04, + "tps_mean": 107.34, + "tps_std": 0.03, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 11.77, "file_size_gib": 13.4, "name_params_b": 11.77, @@ -10634,8 +13171,8 @@ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -10648,13 +13185,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 11.4, - "tps_std": 0.01, + "tps_mean": 11.32, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 11.77, "file_size_gib": 13.4, "name_params_b": 11.77, @@ -10662,8 +13199,8 @@ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -10676,13 +13213,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 687.05, - "tps_std": 0.75, + "tps_mean": 513.78, + "tps_std": 16.1, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 11.77, "file_size_gib": 13.4, "name_params_b": 11.77, @@ -10690,8 +13227,8 @@ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -10704,13 +13241,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 14.14, + "tps_mean": 14.07, "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 11.77, "file_size_gib": 13.4, "name_params_b": 11.77, @@ -10718,8 +13255,8 @@ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -10732,13 +13269,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 376.92, - "tps_std": 18.46, + "tps_mean": 258.63, + "tps_std": 1.57, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 11.77, "file_size_gib": 13.4, "name_params_b": 11.77, @@ -10746,8 +13283,8 @@ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -10760,13 +13297,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 11.72, + "tps_mean": 11.53, "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 11.77, "file_size_gib": 13.4, "name_params_b": 11.77, @@ -10774,8 +13311,8 @@ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -10788,13 +13325,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 463.92, - "tps_std": 1.19, + "tps_mean": 324.28, + "tps_std": 1.23, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 27.01, "file_size_gib": 50.31, "name_params_b": 27.01, @@ -10802,8 +13339,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -10816,13 +13353,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 4.02, + "tps_mean": 3.97, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 27.01, "file_size_gib": 50.31, "name_params_b": 27.01, @@ -10830,8 +13367,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -10844,13 +13381,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 191.32, - "tps_std": 3.3, + "tps_mean": 166.64, + "tps_std": 2.06, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 27.01, "file_size_gib": 50.31, "name_params_b": 27.01, @@ -10858,8 +13395,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -10872,209 +13409,237 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", + "tps_mean": 3.7, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 455.07, + "tps_std": 0.55, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 3.97, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 188.26, + "tps_std": 1.59, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.7, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 426.08, + "tps_std": 0.81, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 3.83, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 209.69, + "tps_std": 3.55, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", "tps_mean": 3.68, - "tps_std": 0.11, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 528.0, - "tps_std": 0.44, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 4.02, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 201.67, - "tps_std": 1.78, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 3.74, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 508.08, - "tps_std": 0.85, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 4.0, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 222.44, - "tps_std": 2.25, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 27.01, "file_size_gib": 50.31, "name_params_b": 27.01, @@ -11082,36 +13647,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 3.72, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -11124,13 +13661,13 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 508.48, - "tps_std": 0.88, + "tps_mean": 440.33, + "tps_std": 0.38, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 27.01, "file_size_gib": 50.31, "name_params_b": 27.01, @@ -11138,8 +13675,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -11152,13 +13689,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 4.0, + "tps_mean": 3.96, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 27.01, "file_size_gib": 50.31, "name_params_b": 27.01, @@ -11166,8 +13703,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -11180,13 +13717,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 220.03, - "tps_std": 0.98, + "tps_mean": 202.85, + "tps_std": 0.64, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 27.01, "file_size_gib": 50.31, "name_params_b": 27.01, @@ -11194,8 +13731,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -11208,13 +13745,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 3.72, - "tps_std": 0.0, + "tps_mean": 3.69, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 27.01, "file_size_gib": 50.31, "name_params_b": 27.01, @@ -11222,8 +13759,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -11236,3205 +13773,3143 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 549.57, - "tps_std": 2.42, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 4.02, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 215.98, - "tps_std": 0.94, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 3.73, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 529.01, - "tps_std": 0.98, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 4.02, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 204.92, - "tps_std": 2.92, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 3.73, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 9.32, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 3.87, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 9.2, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 3.6, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 123.07, - "tps_std": 0.27, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 3.92, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 82.96, - "tps_std": 0.72, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 3.66, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 2870.77, - "tps_std": 12.89, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 84.57, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 1639.03, - "tps_std": 15.14, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 61.51, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 2807.93, - "tps_std": 16.33, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 84.66, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 1262.15, - "tps_std": 24.34, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 61.54, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 2891.85, - "tps_std": 2.6, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 82.18, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 1600.62, - "tps_std": 30.98, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 60.21, - "tps_std": 0.11, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 2893.75, - "tps_std": 3.92, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 82.15, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 1419.18, - "tps_std": 40.21, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 60.24, - "tps_std": 0.11, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 2805.65, - "tps_std": 13.25, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 85.35, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 1215.66, - "tps_std": 10.33, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 62.02, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 2800.57, - "tps_std": 47.75, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 85.47, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 1214.2, - "tps_std": 13.26, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 62.03, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 657.19, - "tps_std": 0.41, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 86.55, - "tps_std": 0.1, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 493.7, - "tps_std": 0.98, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 58.57, - "tps_std": 0.11, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1977.82, - "tps_std": 204.87, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 91.09, - "tps_std": 3.96, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 1149.92, - "tps_std": 30.21, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 67.86, - "tps_std": 0.22, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 181.09, - "tps_std": 1.36, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 51.77, - "tps_std": 0.73, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 217.33, - "tps_std": 0.32, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 36.51, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 181.23, - "tps_std": 1.39, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 52.06, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 218.3, - "tps_std": 0.1, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 34.08, - "tps_std": 4.33, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 681.58, - "tps_std": 4.94, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 50.85, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 396.76, - "tps_std": 35.92, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 40.13, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 678.97, - "tps_std": 4.29, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 51.88, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 400.64, - "tps_std": 35.51, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 37.97, - "tps_std": 3.34, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 649.28, - "tps_std": 39.54, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 52.0, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 259.94, - "tps_std": 5.81, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 40.3, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 666.65, - "tps_std": 12.5, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 52.05, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 261.35, - "tps_std": 6.47, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 40.36, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 643.04, - "tps_std": 39.69, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 54.0, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 195.45, - "tps_std": 2.65, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 37.02, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 597.02, - "tps_std": 9.82, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 57.38, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 278.37, - "tps_std": 7.19, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 42.78, - "tps_std": 0.09, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 547.85, - "tps_std": 6.58, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 73.52, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 402.32, - "tps_std": 0.67, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 52.5, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 546.41, - "tps_std": 6.71, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 73.52, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 402.65, - "tps_std": 1.5, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 52.58, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1779.88, - "tps_std": 16.15, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 73.26, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 583.3, - "tps_std": 9.18, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 57.85, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1785.44, - "tps_std": 15.68, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 73.22, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 600.15, - "tps_std": 13.61, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 57.78, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1742.62, - "tps_std": 12.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 73.51, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 428.95, - "tps_std": 5.63, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 57.82, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1730.96, - "tps_std": 9.7, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 73.53, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 425.86, - "tps_std": 3.58, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 57.9, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1300.97, - "tps_std": 78.99, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 77.58, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 337.8, - "tps_std": 4.4, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 53.06, - "tps_std": 0.1, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1397.71, - "tps_std": 70.15, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 80.99, - "tps_std": 0.06, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 416.91, - "tps_std": 7.9, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 60.56, - "tps_std": 0.77, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 549.58, - "tps_std": 0.25, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 50.55, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7_2__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7_2", - "env_base": "rocm", - "env_variant": "7_2", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 144.34, + "tps_mean": 466.09, "tps_std": 1.03, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 3.98, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 182.83, + "tps_std": 1.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.68, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 452.21, + "tps_std": 1.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 3.98, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 181.81, + "tps_std": 3.26, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.7, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": null + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": null + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 91.65, + "tps_std": 0.4, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 3.98, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 81.52, + "tps_std": 0.33, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.73, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 2700.94, + "tps_std": 1.79, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 77.69, + "tps_std": 0.66, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 1564.27, + "tps_std": 18.55, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 56.8, + "tps_std": 0.48, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 2691.0, + "tps_std": 4.6, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 77.86, + "tps_std": 0.64, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 1563.05, + "tps_std": 19.77, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 56.95, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7_2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 2719.77, + "tps_std": 6.47, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 73.32, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 1687.79, + "tps_std": 33.87, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 54.09, + "tps_std": 0.16, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 2713.06, + "tps_std": 9.07, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 72.85, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 1681.29, + "tps_std": 17.71, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 54.11, + "tps_std": 0.29, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 2666.49, + "tps_std": 3.29, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 78.31, + "tps_std": 0.89, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 1530.18, + "tps_std": 18.45, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 57.72, + "tps_std": 0.36, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 2674.53, + "tps_std": 5.86, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 78.93, + "tps_std": 0.64, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 1534.58, + "tps_std": 20.23, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 57.65, + "tps_std": 0.44, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 647.4, + "tps_std": 0.53, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 79.07, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 455.63, + "tps_std": 1.03, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 54.86, + "tps_std": 0.17, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 2479.97, + "tps_std": 41.4, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 87.24, + "tps_std": 0.13, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 1759.67, + "tps_std": 6.72, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 64.91, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 631.59, + "tps_std": 4.27, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 50.91, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 303.63, + "tps_std": 0.57, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 36.07, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 610.41, + "tps_std": 53.09, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.34, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 302.82, + "tps_std": 1.3, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 36.12, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 648.44, + "tps_std": 6.33, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 49.85, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 442.64, + "tps_std": 0.84, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 39.69, + "tps_std": 0.34, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 639.43, + "tps_std": 31.93, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 50.99, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 438.75, + "tps_std": 1.06, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 39.75, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 596.69, + "tps_std": 97.42, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.38, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 308.13, + "tps_std": 1.66, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 35.43, + "tps_std": 8.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 641.07, + "tps_std": 11.17, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.35, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 314.5, + "tps_std": 0.28, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 40.04, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 576.81, + "tps_std": 2.43, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.18, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 213.74, + "tps_std": 0.68, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 34.52, + "tps_std": 0.07, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 633.21, + "tps_std": 13.06, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 56.15, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 287.49, + "tps_std": 1.21, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 42.67, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1638.53, + "tps_std": 13.5, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7_2__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 72.67, + "tps_std": 0.07, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7_2__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 488.89, + "tps_std": 0.54, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 51.91, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1631.29, + "tps_std": 15.38, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 72.62, + "tps_std": 0.09, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 492.09, + "tps_std": 1.86, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 51.93, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7_2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1691.01, + "tps_std": 16.68, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 72.07, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 730.11, + "tps_std": 1.16, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 57.02, + "tps_std": 0.07, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1690.3, + "tps_std": 13.53, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 72.0, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 730.49, + "tps_std": 1.2, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 56.89, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1668.5, + "tps_std": 13.61, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 72.68, + "tps_std": 0.1, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 507.77, + "tps_std": 2.81, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 57.46, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1642.7, + "tps_std": 14.12, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 72.75, + "tps_std": 0.1, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 507.84, + "tps_std": 1.97, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 57.32, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1303.99, + "tps_std": 6.84, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 73.68, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 364.73, + "tps_std": 0.33, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 49.99, + "tps_std": 0.21, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1577.96, + "tps_std": 12.64, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 78.94, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 541.12, + "tps_std": 0.2, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 60.77, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1543.27, + "tps_std": 2.63, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7_2__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 50.61, + "tps_std": 0.14, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7_2__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7_2", + "env_base": "rocm", + "env_variant": "7_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 165.68, + "tps_std": 0.88, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, "params_b": 6.74, "file_size_gib": 3.56, "name_params_b": 6.74, @@ -14442,8 +16917,8 @@ "log": "results/llama-2-7b.Q4_0__rocm-7_2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -14456,13 +16931,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 5.6, + "tps_mean": 5.65, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 6.74, "file_size_gib": 3.56, "name_params_b": 6.74, @@ -14470,8 +16945,8 @@ "log": "results/llama-2-7b.Q4_0__rocm-7_2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -14484,685 +16959,685 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 548.97, + "tps_mean": 1539.48, + "tps_std": 5.61, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 50.57, + "tps_std": 0.15, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 166.31, + "tps_std": 1.47, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7_2-hblt0", + "env_base": "rocm", + "env_variant": "7_2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 5.65, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1528.37, + "tps_std": 6.4, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.14, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 196.35, + "tps_std": 2.62, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 6.97, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1521.12, + "tps_std": 5.74, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.22, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 196.03, + "tps_std": 1.28, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 6.97, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1507.68, + "tps_std": 1.23, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.41, "tps_std": 0.14, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 6.74, "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1.log", + "log": "results/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 50.52, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 143.7, - "tps_std": 0.31, + "tps_mean": 206.97, + "tps_std": 1.96, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 6.74, "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1__longctx32768.log", + "log": "results/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7_2-hblt0", - "env_base": "rocm", - "env_variant": "7_2-hblt0", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 5.6, + "tps_mean": 5.62, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 6.74, "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7_2__hblt0__fa1__longctx32768.log", + "log": "results/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1508.58, + "tps_std": 2.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.44, + "tps_std": 0.15, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 206.54, + "tps_std": 0.91, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7-nightlies-hblt0", + "env_base": "rocm7", + "env_variant": "nightlies-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 5.62, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1597.02, - "tps_std": 1.89, + "tps_mean": 324.82, + "tps_std": 0.45, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 6.74, "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log", + "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 51.01, - "tps_std": 0.01, + "tps_mean": 55.43, + "tps_std": 0.14, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 6.74, "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log", + "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 193.62, + "tps_mean": 144.29, + "tps_std": 1.09, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 10.2, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "2405d59cb", + "number": "8577" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1313.97, "tps_std": 1.29, "error": false, "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 6.93, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1598.36, - "tps_std": 1.08, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 51.01, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 191.18, - "tps_std": 2.26, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 6.94, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1590.62, - "tps_std": 1.92, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 51.17, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 210.19, - "tps_std": 4.57, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 5.56, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1590.4, - "tps_std": 2.71, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 51.21, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 216.78, - "tps_std": 3.22, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7-nightlies-hblt0", - "env_base": "rocm7", - "env_variant": "nightlies-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 5.56, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": null, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7-nightlies__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 349.95, - "tps_std": 0.3, - "error": false, - "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": null, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 56.0, - "tps_std": 0.22, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 152.53, - "tps_std": 0.11, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 9.29, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "e0c93af2a", - "number": "7938" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1355.55, - "tps_std": 2.34, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 6.74, "file_size_gib": 3.56, "name_params_b": 6.74, @@ -15170,8 +17645,8 @@ "log": "results/llama-2-7b.Q4_0__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -15184,13 +17659,13 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 55.88, - "tps_std": 0.13, + "tps_mean": 55.59, + "tps_std": 0.05, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 6.74, "file_size_gib": 3.56, "name_params_b": 6.74, @@ -15198,8 +17673,8 @@ "log": "results/llama-2-7b.Q4_0__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -15212,13 +17687,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 246.2, - "tps_std": 1.24, + "tps_mean": 208.18, + "tps_std": 2.24, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 6.74, "file_size_gib": 3.56, "name_params_b": 6.74, @@ -15226,8 +17701,8 @@ "log": "results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, { @@ -15240,13 +17715,13 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 8.76, - "tps_std": 0.0, + "tps_mean": 8.52, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, - "mmap": null, + "mmap": 0, "params_b": 6.74, "file_size_gib": 3.56, "name_params_b": 6.74, @@ -15254,8 +17729,8 @@ "log": "results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "e0c93af2a", - "number": "7938" + "hash": "2405d59cb", + "number": "8577" } }, {