From 7f34f51202e92ae94084a217be3a151485f734ed Mon Sep 17 00:00:00 2001 From: Donato Capitella Date: Fri, 28 Nov 2025 17:50:21 +0000 Subject: [PATCH] Added Qwen-3-Next benchmarks --- ...001-of-00002__rocm-7alpha-rocwmma__fa1.log | 10 + ...00002__rocm-7alpha-rocwmma__hblt0__fa1.log | 10 + ..._K_XL-00001-of-00002__rocm-7alpha__fa1.log | 10 + ...0001-of-00002__rocm-7alpha__hblt0__fa1.log | 10 + ...00001-of-00002__rocm6_4_4-rocwmma__fa1.log | 10 + ...f-00002__rocm6_4_4-rocwmma__hblt0__fa1.log | 10 + ...Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log | 10 + ...-00001-of-00002__rocm6_4_4__hblt0__fa1.log | 4 + ...L-00001-of-00002__rocm7.1-rocwmma__fa1.log | 10 + ...-of-00002__rocm7.1-rocwmma__hblt0__fa1.log | 10 + ...D-Q8_K_XL-00001-of-00002__rocm7.1__fa1.log | 10 + ...XL-00001-of-00002__rocm7.1__hblt0__fa1.log | 10 + ...-00001-of-00002__rocm7_rc-rocwmma__fa1.log | 10 + ...of-00002__rocm7_rc-rocwmma__hblt0__fa1.log | 10 + ...-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log | 10 + ...L-00001-of-00002__rocm7_rc__hblt0__fa1.log | 10 + ..._XL-00001-of-00002__vulkan_amdvlk__fa1.log | 8 + ..._K_XL-00001-of-00002__vulkan_radv__fa1.log | 8 + docs/results.json | 952 +++++++++++++++++- 19 files changed, 1121 insertions(+), 1 deletion(-) create mode 100644 benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log create mode 100644 benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log create mode 100644 benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log create mode 100644 benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log create mode 100644 benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log create mode 100644 benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log create mode 100644 benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log create mode 100644 benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log create mode 100644 benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log create mode 100644 benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log create mode 100644 benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__fa1.log create mode 100644 benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log create mode 100644 benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log create mode 100644 benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log create mode 100644 benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log create mode 100644 benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log create mode 100644 benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log new file mode 100644 index 0000000..a77925e --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 245.79 ± 0.39 | +| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 19.30 ± 0.09 | + +build: c6f7a423c (7189) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..ace9f20 --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 301.76 ± 0.36 | +| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 23.98 ± 0.02 | + +build: c6f7a423c (7189) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log new file mode 100644 index 0000000..1e38220 --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 249.80 ± 4.99 | +| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 19.17 ± 0.01 | + +build: c6f7a423c (7189) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log new file mode 100644 index 0000000..97628ee --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 305.05 ± 2.25 | +| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 23.99 ± 0.16 | + +build: c6f7a423c (7189) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log new file mode 100644 index 0000000..b239dc7 --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 311.74 ± 3.86 | +| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 22.66 ± 0.25 | + +build: c6f7a423c (7189) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..f8bb38c --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 313.20 ± 3.93 | +| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 23.02 ± 0.30 | + +build: c6f7a423c (7189) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log new file mode 100644 index 0000000..1975916 --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 320.42 ± 0.35 | +| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 23.06 ± 0.13 | + +build: c6f7a423c (7189) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..3ebab6a --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,4 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log new file mode 100644 index 0000000..68ff91e --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 258.30 ± 2.66 | +| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 18.49 ± 0.24 | + +build: c6f7a423c (7189) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..bc9a201 --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 322.70 ± 2.10 | +| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 23.45 ± 0.19 | + +build: c6f7a423c (7189) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__fa1.log new file mode 100644 index 0000000..b530ec3 --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 253.35 ± 2.88 | +| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 18.25 ± 0.15 | + +build: c6f7a423c (7189) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log new file mode 100644 index 0000000..b4d9d8e --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 319.09 ± 2.77 | +| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 23.72 ± 0.03 | + +build: c6f7a423c (7189) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log new file mode 100644 index 0000000..b1c1e89 --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 254.34 ± 2.60 | +| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 18.06 ± 0.08 | + +build: c6f7a423c (7189) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..8d02cca --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 323.33 ± 1.75 | +| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 23.37 ± 0.17 | + +build: c6f7a423c (7189) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log new file mode 100644 index 0000000..c7880f1 --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 242.43 ± 0.28 | +| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 18.35 ± 0.10 | + +build: c6f7a423c (7189) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log new file mode 100644 index 0000000..7957072 --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 305.22 ± 3.10 | +| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 23.60 ± 0.08 | + +build: c6f7a423c (7189) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..aac7154 --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +ggml_vulkan: Device memory allocation of size 614888064 failed. +ggml_vulkan: vk::Device::allocateMemory: ErrorOutOfHostMemory +main: error: failed to load model '/home/kyuz0/models/UD-Q8_K_XL/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf' +✖ ! [vulkan_amdvlk] Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__fa1 failed (exit 0) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log new file mode 100644 index 0000000..7245625 --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp512 | 324.45 ± 0.42 | +| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg128 | 22.31 ± 0.18 | + +build: c6f7a423c (7189) diff --git a/docs/results.json b/docs/results.json index 72568e0..a8b09ff 100644 --- a/docs/results.json +++ b/docs/results.json @@ -1,6 +1,6 @@ { "meta": { - "generated_at": "2025-11-19T07:33:18Z", + "generated_at": "2025-11-28T15:55:43Z", "os_kernel": "Fedora 42 \u2014 Linux 6.15.9-201.fc42.x86_64 (Sat Aug 2 11:37:34 UTC 2025)", "llamacpp_builds": [ { @@ -47,6 +47,10 @@ "hash": "bca95ca51", "number": "7036" }, + { + "hash": "c6f7a423c", + "number": "7189" + }, { "hash": "caca0d55c", "number": "7085" @@ -22776,6 +22780,952 @@ "number": "7089" } }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 245.79, + "tps_std": 0.39, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "c6f7a423c", + "number": "7189" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 19.3, + "tps_std": 0.09, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "c6f7a423c", + "number": "7189" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 301.76, + "tps_std": 0.36, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "c6f7a423c", + "number": "7189" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 23.98, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "c6f7a423c", + "number": "7189" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 249.8, + "tps_std": 4.99, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log", + "rpc": false, + "build": { + "hash": "c6f7a423c", + "number": "7189" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 19.17, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log", + "rpc": false, + "build": { + "hash": "c6f7a423c", + "number": "7189" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 305.05, + "tps_std": 2.25, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "c6f7a423c", + "number": "7189" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 23.99, + "tps_std": 0.16, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "c6f7a423c", + "number": "7189" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 311.74, + "tps_std": 3.86, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "c6f7a423c", + "number": "7189" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 22.66, + "tps_std": 0.25, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "c6f7a423c", + "number": "7189" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 313.2, + "tps_std": 3.93, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "c6f7a423c", + "number": "7189" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 23.02, + "tps_std": 0.3, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "c6f7a423c", + "number": "7189" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 320.42, + "tps_std": 0.35, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "c6f7a423c", + "number": "7189" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 23.06, + "tps_std": 0.13, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "c6f7a423c", + "number": "7189" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": false, + "error_type": null, + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 80.0, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": null + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 258.3, + "tps_std": 2.66, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "c6f7a423c", + "number": "7189" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1-rocwmma", + "env_base": "rocm7.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 18.49, + "tps_std": 0.24, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "c6f7a423c", + "number": "7189" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 322.7, + "tps_std": 2.1, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "c6f7a423c", + "number": "7189" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1-rocwmma-hblt0", + "env_base": "rocm7.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 23.45, + "tps_std": 0.19, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "c6f7a423c", + "number": "7189" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 253.35, + "tps_std": 2.88, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__fa1.log", + "rpc": false, + "build": { + "hash": "c6f7a423c", + "number": "7189" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1", + "env_base": "rocm7.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 18.25, + "tps_std": 0.15, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__fa1.log", + "rpc": false, + "build": { + "hash": "c6f7a423c", + "number": "7189" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 319.09, + "tps_std": 2.77, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "c6f7a423c", + "number": "7189" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1-hblt0", + "env_base": "rocm7.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 23.72, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "c6f7a423c", + "number": "7189" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 254.34, + "tps_std": 2.6, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "c6f7a423c", + "number": "7189" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 18.06, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "c6f7a423c", + "number": "7189" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 323.33, + "tps_std": 1.75, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "c6f7a423c", + "number": "7189" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 23.37, + "tps_std": 0.17, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "c6f7a423c", + "number": "7189" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 242.43, + "tps_std": 0.28, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log", + "rpc": false, + "build": { + "hash": "c6f7a423c", + "number": "7189" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 18.35, + "tps_std": 0.1, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log", + "rpc": false, + "build": { + "hash": "c6f7a423c", + "number": "7189" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 305.22, + "tps_std": 3.1, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "c6f7a423c", + "number": "7189" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 23.6, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "c6f7a423c", + "number": "7189" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "load", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 80.0, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": null + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 324.45, + "tps_std": 0.42, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "c6f7a423c", + "number": "7189" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 22.31, + "tps_std": 0.18, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "c6f7a423c", + "number": "7189" + } + }, { "model": "gemma-3-12b-it-UD-Q8_K_XL", "model_clean": "gemma-3-12b-it-UD-Q8_K_XL",