diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log deleted file mode 100644 index 3d2d9c2..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 125.93 ± 0.26 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.52 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log deleted file mode 100644 index 47aebe3..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 135.40 ± 0.23 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.69 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log deleted file mode 100644 index ad3bc31..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 132.28 ± 0.14 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.50 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log deleted file mode 100644 index f3696a4..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 139.86 ± 0.32 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.70 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log deleted file mode 100644 index cba7aea..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 125.92 ± 0.27 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.52 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log deleted file mode 100644 index 3f6be05..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 134.12 ± 0.59 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.66 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0.log deleted file mode 100644 index b894fe4..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 131.45 ± 0.35 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.53 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log deleted file mode 100644 index ffffffb..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 140.67 ± 0.26 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.67 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma.log index daa1793..ec80d4d 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 128.18 ± 0.37 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.51 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 121.16 ± 0.25 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.46 ± 0.00 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log index e798784..d16366a 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 134.92 ± 0.21 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.08 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 134.20 ± 0.28 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.04 ± 0.01 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log index 2d8f0ca..cc456bd 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 159.31 ± 0.83 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.34 ± 0.01 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 135.87 ± 0.06 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.46 ± 0.01 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log index 5aa0185..cc7c79d 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 171.67 ± 0.36 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.04 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 171.53 ± 0.45 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.08 ± 0.00 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4.log index e40c1b6..bfe97bd 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 128.02 ± 0.30 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.53 ± 0.01 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 126.30 ± 0.14 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 19.83 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log index 6554256..15cb662 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 136.15 ± 0.32 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.05 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 136.00 ± 0.15 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.93 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0.log index 5da6d51..92bb7f3 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 160.41 ± 0.61 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.50 ± 0.01 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 160.81 ± 0.78 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.41 ± 0.01 | build: 4807e8f9 (6609) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log index f00f375..a8e57a0 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 161.32 ± 0.19 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.06 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 132.96 ± 0.49 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.99 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log deleted file mode 100644 index d34b8a5..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 94.56 ± 0.11 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 19.90 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log deleted file mode 100644 index 638b42c..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 127.25 ± 0.57 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.66 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log deleted file mode 100644 index 6310d52..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 128.69 ± 0.57 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.56 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log deleted file mode 100644 index 8605e6d..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 169.19 ± 0.12 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.67 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log index e2005d4..6522117 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 94.71 ± 0.12 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.53 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 102.61 ± 0.20 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.54 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log index f579cee..523ba45 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 126.97 ± 0.54 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.70 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 67.08 ± 0.15 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.07 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log index 38f5f9b..9b3654e 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 160.39 ± 0.34 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.56 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 117.71 ± 0.66 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.53 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log index f4788eb..7cccce1 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 169.35 ± 0.56 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.65 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 148.21 ± 0.25 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.10 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log index bf5b0b4..1dbc576 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 94.73 ± 0.22 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.47 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 119.33 ± 0.28 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.19 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log index 7d4fe91..f8a3d29 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 93.27 ± 0.18 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.67 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 93.03 ± 0.12 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.00 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log index 6afa9b4..2e935e0 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 159.89 ± 0.44 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.55 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 136.81 ± 0.43 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.52 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log index 543674d..fd97866 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 170.42 ± 0.33 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.66 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 148.95 ± 0.73 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.04 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log index 426e6cc..b1887c5 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 217.22 ± 0.49 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 24.18 ± 0.01 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 216.84 ± 0.52 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 24.15 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log index 9a614f0..3d6be96 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 219.61 ± 0.55 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 24.21 ± 0.01 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 218.68 ± 0.54 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 24.27 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log index 5471167..0dda835 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 212.60 ± 0.74 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 24.18 ± 0.03 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 223.39 ± 1.25 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 24.06 ± 0.03 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log index 9353954..a336437 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 224.85 ± 2.55 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 24.64 ± 0.01 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 236.02 ± 2.60 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 24.51 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log deleted file mode 100644 index 5dfa73b..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 120.87 ± 0.23 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.86 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log deleted file mode 100644 index d31d01b..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 128.65 ± 0.59 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 15.96 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log deleted file mode 100644 index a89a06c..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0xe6e7340) reason :GPU Hang -✖ ! [rocm6_4_3-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log deleted file mode 100644 index 4115027..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x400a9340) on address 0x7ef17b435000. Reason: Page not present or supervisor privilege. -✖ ! [rocm6_4_3-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3.log deleted file mode 100644 index b7bc1fd..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 120.53 ± 0.28 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.87 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__fa1.log deleted file mode 100644 index d9d9b0b..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 129.22 ± 0.41 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 15.95 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__hblt0.log deleted file mode 100644 index b47fc74..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__hblt0.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x22558310) on address 0x7f7830fad000. Reason: Page not present or supervisor privilege. -✖ ! [rocm6_4_3] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__hblt0__fa1.log deleted file mode 100644 index d51b566..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 128.68 ± 0.22 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 15.96 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma.log index 3b36313..1d82208 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 123.24 ± 0.42 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.84 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 118.41 ± 0.20 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.75 ± 0.16 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log index 771d380..232c71a 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 129.37 ± 0.24 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.17 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 128.51 ± 0.51 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.13 ± 0.16 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log index a85e834..b1e22c6 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 151.03 ± 0.45 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.79 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 133.48 ± 0.45 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.77 ± 0.11 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log index a8f8332..e9db21c 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 155.49 ± 0.74 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.18 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 143.55 ± 0.54 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.17 ± 0.06 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4.log index 591d402..dbef949 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 122.48 ± 0.34 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.86 ± 0.01 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 121.76 ± 0.14 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.69 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log index f4aac77..a56945a 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 130.06 ± 0.38 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.18 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 129.77 ± 0.12 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.14 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0.log index fb204a9..59c0892 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 150.67 ± 0.75 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 140.68 ± 0.66 | | glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.84 ± 0.01 | build: 4807e8f9 (6609) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log index 773856c..94b1ac0 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 149.93 ± 0.58 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.18 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 134.52 ± 0.53 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.08 ± 0.21 | build: 4807e8f9 (6609) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log deleted file mode 100644 index b0e83f6..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 91.95 ± 0.25 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.76 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log deleted file mode 100644 index 252ee22..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 70.00 ± 0.17 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 15.98 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log deleted file mode 100644 index 2210c41..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 134.22 ± 0.50 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.90 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log deleted file mode 100644 index 07268a4..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 159.75 ± 0.33 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 15.99 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma.log index 96555fb..5d0df46 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 92.18 ± 0.04 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.92 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 97.09 ± 0.15 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.89 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log index e3f3b88..599ea21 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 121.75 ± 0.32 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 15.97 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 69.91 ± 0.44 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.13 ± 0.11 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log index ebfc2f2..067a1fa 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 151.32 ± 0.45 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.90 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 128.74 ± 0.30 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.77 ± 0.20 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log index c57d72f..3c4b809 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 161.10 ± 0.36 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 15.99 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 133.32 ± 0.82 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.10 ± 0.20 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log index 3f154b9..3dfe042 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 92.20 ± 0.11 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.85 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 91.95 ± 0.23 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.80 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log index 068b285..bcb231a 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 71.02 ± 0.16 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 15.96 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 125.81 ± 0.29 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.20 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0.log index 88bcc9c..820ccad 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 147.32 ± 0.43 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.91 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 142.12 ± 0.60 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.89 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log index 928c3ad..f1daee6 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 161.37 ± 0.36 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 15.99 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 139.60 ± 0.48 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.10 ± 0.17 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log index a8b6d69..134e65b 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 264.50 ± 0.99 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 17.27 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 260.51 ± 1.03 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 17.26 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log index 740e1fd..a35925a 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 267.86 ± 1.22 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 17.28 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 262.18 ± 1.19 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 17.30 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log index 9f963a8..f587e64 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 208.01 ± 0.73 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 17.49 ± 0.02 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 222.31 ± 0.71 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 17.43 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log index c1968b9..7a14496 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 221.63 ± 1.26 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 17.71 ± 0.01 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 233.21 ± 6.28 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 17.65 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log deleted file mode 100644 index 07addc3..0000000 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log +++ /dev/null @@ -1,11 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -hipBLASLt error: Heuristic Fetch Failed! -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -HW Exception by GPU node-1 (Agent handle: 0x284c3340) reason :GPU Hang -✖ ! [rocm6_4_3-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 failed (exit 134) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log deleted file mode 100644 index cbc1a68..0000000 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 101.82 ± 0.06 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log deleted file mode 100644 index 635e8f4..0000000 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x7166340) reason :GPU Hang -✖ ! [rocm6_4_3-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __hblt0 failed (exit 134) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log deleted file mode 100644 index 8ab83e5..0000000 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x37f0e340) reason :GPU Hang -✖ ! [rocm6_4_3-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3.log deleted file mode 100644 index ed7de0e..0000000 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -hipBLASLt error: Heuristic Fetch Failed! -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 94.79 ± 0.14 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.78 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__fa1.log deleted file mode 100644 index ab92419..0000000 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 104.62 ± 0.08 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__hblt0.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__hblt0.log deleted file mode 100644 index b2280bc..0000000 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__hblt0.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x12cee310) reason :GPU Hang -✖ ! [rocm6_4_3] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __hblt0 failed (exit 134) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log deleted file mode 100644 index d7451a9..0000000 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x367c310) on address 0x7fc07ad93000. Reason: Page not present or supervisor privilege. -✖ ! [rocm6_4_3] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma.log index 27c4fe3..87b4648 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 98.87 ± 0.18 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.77 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 98.00 ± 0.05 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.77 ± 0.01 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log index ba04c4f..3c54596 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 104.31 ± 0.07 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 103.65 ± 0.17 | | llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log index 2cf5854..eec5bf4 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 97.43 ± 0.23 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.76 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 98.82 ± 0.24 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.78 ± 0.00 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log index ca99086..296b7e3 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 103.81 ± 0.09 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 102.51 ± 0.05 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4.log index c9ad273..5e0d92a 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 99.32 ± 0.17 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.78 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 97.85 ± 0.06 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.76 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log index 5ab870f..b0dab6c 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 104.93 ± 0.11 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 102.47 ± 0.08 | | llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0.log index 89f6c3b..b9a373c 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 98.99 ± 0.21 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 98.12 ± 0.10 | | llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.78 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log index 83ddd35..6fb13dc 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 103.03 ± 0.23 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 104.23 ± 0.24 | | llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log deleted file mode 100644 index 4f142bc..0000000 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 98.15 ± 0.16 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.77 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log deleted file mode 100644 index 7ec867a..0000000 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 102.79 ± 0.14 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log deleted file mode 100644 index 102a571..0000000 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 93.89 ± 0.22 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.78 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log deleted file mode 100644 index 5d3aace..0000000 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 97.53 ± 0.17 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log index 896a9c7..871f10a 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 97.42 ± 0.12 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 92.92 ± 8.60 | | llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.78 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log index a3dbd8f..718f2ba 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 101.56 ± 0.04 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 103.05 ± 0.09 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log index 9ead8d1..46fe5df 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 92.02 ± 0.17 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 93.96 ± 0.14 | | llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.78 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log index 80ec16d..c447119 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 97.10 ± 0.17 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 97.51 ± 0.21 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log index fb7b0d0..7f147c8 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 95.12 ± 0.17 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 95.55 ± 0.07 | | llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.77 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log index 8390da3..985b743 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 103.16 ± 0.07 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.01 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 104.25 ± 0.16 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log index f0129d8..7f54c86 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 93.86 ± 0.18 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 93.72 ± 0.13 | | llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.78 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log index 23bf2f4..75949af 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 95.87 ± 0.08 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 97.17 ± 0.16 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log index 114cbeb..1cf5184 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 0 | pp512 | 97.72 ± 0.36 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 0 | pp512 | 97.22 ± 0.38 | | llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 0 | tg128 | 2.81 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log index 4d303d4..8b37fdb 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp512 | 99.04 ± 0.31 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp512 | 98.46 ± 0.54 | | llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg128 | 2.80 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log index dcad22c..1a4f0e1 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 0 | pp512 | 78.94 ± 0.51 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 0 | pp512 | 83.82 ± 1.56 | | llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 0 | tg128 | 2.78 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log index d7b8226..a378333 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp512 | 80.90 ± 0.77 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp512 | 86.06 ± 1.83 | | llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma.log deleted file mode 100644 index 1a327e9..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 265.76 ± 0.95 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.69 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__fa1.log deleted file mode 100644 index eec534a..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 289.14 ± 1.57 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.64 ± 0.15 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log deleted file mode 100644 index 3fc5e97..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x24187340) reason :GPU Hang -✖ ! [rocm6_4_3-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __hblt0 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log deleted file mode 100644 index b5d2530..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x3da9340) reason :GPU Hang -✖ ! [rocm6_4_3-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3.log deleted file mode 100644 index ba3f44f..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3.log +++ /dev/null @@ -1,11 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -HW Exception by GPU node-1 (Agent handle: 0x11bc3310) reason :GPU Hang -✖ ! [rocm6_4_3] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__fa1.log deleted file mode 100644 index 772e891..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 291.67 ± 0.91 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.71 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__hblt0.log deleted file mode 100644 index b338079..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__hblt0.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x8a0a310) reason :GPU Hang -✖ ! [rocm6_4_3] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __hblt0 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__hblt0__fa1.log deleted file mode 100644 index d72a6cb..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__hblt0__fa1.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x1ada6310) reason :GPU Hang -✖ ! [rocm6_4_3] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma.log index 0942b1d..694a8ca 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 276.88 ± 1.57 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.66 ± 0.00 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 264.81 ± 10.73 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.68 ± 0.05 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__fa1.log index 23ba9a1..525ed6d 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 292.47 ± 1.18 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.83 ± 0.00 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 282.95 ± 5.18 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.77 ± 0.06 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log index 3ad139d..3052d56 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 277.79 ± 0.94 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.65 ± 0.00 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 278.22 ± 1.12 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.67 ± 0.03 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log index 17338e8..49eed32 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 292.17 ± 1.61 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.83 ± 0.00 | - -build: 4807e8f9 (6609) +Memory access fault by GPU node-1 (Agent handle: 0x16ddf160) on address 0x7f64d1356000. Reason: Page not present or supervisor privilege. +✖ ! [rocm6_4_4-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4.log index 0fba6ba..857b288 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 276.97 ± 1.15 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.71 ± 0.00 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 269.91 ± 1.51 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.50 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__fa1.log index 7a9c31a..8195028 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 293.79 ± 2.33 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.84 ± 0.00 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 288.79 ± 1.53 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.79 ± 0.05 | build: 4807e8f9 (6609) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0.log index f465241..e6885ae 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 278.59 ± 1.22 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.70 ± 0.00 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 276.43 ± 0.75 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.69 ± 0.03 | build: 4807e8f9 (6609) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0__fa1.log index 1ceb016..52bc536 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 296.61 ± 0.98 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.83 ± 0.00 | - -build: 4807e8f9 (6609) +HW Exception by GPU node-1 (Agent handle: 0x12e56b10) reason :GPU Hang +✖ ! [rocm6_4_4] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log deleted file mode 100644 index 334b616..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 276.44 ± 1.46 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.55 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log deleted file mode 100644 index 632535a..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 292.67 ± 1.04 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.71 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log deleted file mode 100644 index 8fca923..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 273.88 ± 1.14 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.70 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log deleted file mode 100644 index 9bba0b6..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 284.81 ± 1.55 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.72 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma.log index 3e4a420..3aa3ab0 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 274.13 ± 0.84 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.71 ± 0.00 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 270.82 ± 1.43 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.66 ± 0.07 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log index 44047fc..139eda7 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 292.92 ± 2.63 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.71 ± 0.00 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 288.92 ± 3.51 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.81 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0.log index 52dda2f..45c12aa 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 273.23 ± 1.35 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.70 ± 0.00 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 275.26 ± 1.54 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.66 ± 0.08 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log index f01372d..cdcef38 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log @@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x13c5d180) reason :GPU Hang -✖ ! [rocm7_rc-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __hblt0__fa1 failed (exit 134) +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 285.69 ± 1.15 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.82 ± 0.00 | + +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log index f79d354..60d4cb4 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x381db160) on address 0x7f72baf68000. Reason: Page not present or supervisor privilege. +HW Exception by GPU node-1 (Agent handle: 0x36543290) reason :GPU Hang ✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log index d2e6f1c..7865e05 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log @@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x34902180) reason :GPU Hang -✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 failed (exit 134) +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 291.90 ± 1.98 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.78 ± 0.02 | + +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0.log index ff5e1de..df2e258 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 274.52 ± 1.78 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.70 ± 0.00 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 271.87 ± 1.00 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.69 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1.log index e29a4b0..26c6647 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1.log @@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 287.04 ± 1.92 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.71 ± 0.00 | - -build: f1fbffb5 (6486) +HW Exception by GPU node-1 (Agent handle: 0x292a92a0) reason :GPU Hang +✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log index e235c42..1c4d9fe 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 224.02 ± 2.86 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 15.98 ± 0.00 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 224.42 ± 3.09 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 15.99 ± 0.02 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log index 9e0f45f..7b878a2 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 234.30 ± 1.10 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 15.75 ± 0.00 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 224.57 ± 3.64 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 15.76 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log index eff084d..8d69d90 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 201.49 ± 2.22 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 15.77 ± 0.01 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 206.64 ± 2.56 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 15.81 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log index ff879bc..a07e37d 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 202.49 ± 5.98 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 15.74 ± 0.00 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 212.38 ± 2.39 | +| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 15.76 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma.log deleted file mode 100644 index e0ffee4..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 264.44 ± 24.69 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.88 ± 0.05 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__fa1.log deleted file mode 100644 index 1f9cd7e..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 298.83 ± 1.59 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 11.89 ± 0.06 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log deleted file mode 100644 index a4e7e73..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x3265f340) reason :GPU Hang -✖ ! [rocm6_4_3-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __hblt0 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log deleted file mode 100644 index 2f00d23..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x33cad340) reason :GPU Hang -✖ ! [rocm6_4_3-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3.log deleted file mode 100644 index 052bfbb..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 274.49 ± 1.84 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.91 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__fa1.log deleted file mode 100644 index 5571471..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 298.07 ± 2.73 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 11.89 ± 0.06 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__hblt0.log deleted file mode 100644 index 27d8d9a..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__hblt0.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x1ac74310) reason :GPU Hang -✖ ! [rocm6_4_3] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __hblt0 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__hblt0__fa1.log deleted file mode 100644 index 6e6a4d0..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__hblt0__fa1.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x390d2310) reason :GPU Hang -✖ ! [rocm6_4_3] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma.log index a90f13a..cb99f32 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 281.33 ± 2.60 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.89 ± 0.00 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 275.00 ± 1.39 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.89 ± 0.03 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__fa1.log index e75d6ae..edb356d 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 297.14 ± 1.58 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 12.00 ± 0.00 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 293.68 ± 3.72 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 11.96 ± 0.02 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log index 321f56b..b54f84b 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 280.36 ± 0.42 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.88 ± 0.00 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 260.53 ± 23.26 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.82 ± 0.11 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log index 541b6d2..18e96ab 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 298.12 ± 2.72 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 12.00 ± 0.00 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 295.09 ± 2.26 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 11.98 ± 0.01 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4.log index 9662d5c..5cc9682 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 279.89 ± 0.66 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.92 ± 0.00 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 276.63 ± 1.32 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.87 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__fa1.log index 29a7042..9d334f2 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 297.68 ± 2.90 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 11.97 ± 0.09 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 299.51 ± 2.06 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 11.99 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0.log index 64d07c9..5a7c43c 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 284.44 ± 3.25 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.90 ± 0.04 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 283.00 ± 2.10 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.90 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0__fa1.log index 906e40b..e30f07d 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 300.04 ± 1.45 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 12.00 ± 0.00 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 293.35 ± 11.21 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 11.94 ± 0.13 | build: 4807e8f9 (6609) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log deleted file mode 100644 index 7928822..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 275.21 ± 1.93 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.85 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log deleted file mode 100644 index 1e1c530..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 292.69 ± 2.25 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 11.91 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log deleted file mode 100644 index 7b4105e..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 267.51 ± 12.72 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.90 ± 0.04 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log deleted file mode 100644 index 546aac5..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 286.25 ± 4.29 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 11.90 ± 0.05 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma.log index 42ba624..2db2881 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 260.60 ± 10.80 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.82 ± 0.22 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 272.39 ± 2.15 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.92 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log index a58997b..82f48de 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 293.26 ± 3.75 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 11.92 ± 0.01 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 296.04 ± 2.16 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 11.98 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0.log index 2b05052..86c1b70 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 275.91 ± 1.81 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.91 ± 0.00 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 266.07 ± 22.72 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.92 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log index e5a0196..d94673e 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 260.83 ± 5.18 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 11.82 ± 0.20 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 257.00 ± 4.08 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 11.76 ± 0.51 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log index 268c6ee..8882272 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 279.56 ± 3.76 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.88 ± 0.02 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 263.12 ± 18.31 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.89 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log index a877428..c9f896e 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log @@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x30007180) reason :GPU Hang -✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 failed (exit 134) +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 284.19 ± 24.64 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 11.89 ± 0.20 | + +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0.log index b6e5310..eeccd01 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 278.61 ± 2.47 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.92 ± 0.00 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 273.01 ± 1.64 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.85 ± 0.11 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1.log index 19dcf24..1f98864 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x231e9180) reason :GPU Hang +HW Exception by GPU node-1 (Agent handle: 0xe25e2a0) reason :GPU Hang ✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log index 5436987..b75fe91 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 343.36 ± 1.37 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 346.53 ± 1.71 | | llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 12.57 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log index 1755f19..8945f44 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 347.56 ± 1.15 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 12.42 ± 0.01 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 346.93 ± 1.50 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 12.44 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log index 0d356f0..eccb0f0 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 244.52 ± 1.08 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 12.57 ± 0.00 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 272.53 ± 1.82 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 12.58 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log index be8e1fa..3a955b7 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 253.13 ± 1.36 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 12.56 ± 0.00 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 280.38 ± 1.48 | +| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 12.58 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log deleted file mode 100644 index fb8f81b..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log +++ /dev/null @@ -1,11 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -hipBLASLt error: Heuristic Fetch Failed! -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -HW Exception by GPU node-1 (Agent handle: 0x1f49a340) reason :GPU Hang -✖ ! [rocm6_4_3-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log deleted file mode 100644 index ca1ff97..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 307.79 ± 3.48 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.81 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log deleted file mode 100644 index a0f2615..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 138.51 ± 0.72 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.80 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log deleted file mode 100644 index 1e52153..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 142.41 ± 0.57 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.80 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log deleted file mode 100644 index 1039808..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -hipBLASLt error: Heuristic Fetch Failed! -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 282.50 ± 1.23 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.78 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log deleted file mode 100644 index 720ab4a..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 306.89 ± 1.35 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.82 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0.log deleted file mode 100644 index 91a8494..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 137.98 ± 0.67 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.79 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log deleted file mode 100644 index 7b80e05..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 143.18 ± 0.45 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.81 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma.log index a4df56c..e7881a2 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 291.19 ± 2.35 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.82 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 283.37 ± 1.53 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.81 ± 0.00 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log index b692d74..bf8c9d0 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 307.71 ± 1.77 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 18.00 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 305.77 ± 1.56 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.97 ± 0.00 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log index b358a77..28cf6b2 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log @@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 291.96 ± 2.18 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.82 ± 0.00 | - -build: 4807e8f9 (6609) +Memory access fault by GPU node-1 (Agent handle: 0xcb3c160) on address 0x7fdd3957e000. Reason: Page not present or supervisor privilege. +✖ ! [rocm6_4_4-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __hblt0 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log index 05bade8..857fd84 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 310.84 ± 1.35 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 18.01 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 306.78 ± 2.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.96 ± 0.00 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4.log index b0ad559..e974bfc 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 291.26 ± 0.79 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.83 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 285.54 ± 1.17 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.59 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log index ff01df1..c30ddd6 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 311.26 ± 1.06 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.97 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 310.82 ± 2.23 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.96 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0.log index 606cad7..d029a1f 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 290.78 ± 1.38 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 295.23 ± 0.70 | | llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.81 ± 0.01 | build: 4807e8f9 (6609) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log index b6018d7..a67e3ce 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 310.36 ± 1.62 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 18.00 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 312.09 ± 1.64 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.97 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log deleted file mode 100644 index 86b7922..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 283.86 ± 1.18 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.71 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log deleted file mode 100644 index 562f9a8..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 299.13 ± 2.14 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.77 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log deleted file mode 100644 index 5230084..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 286.66 ± 1.37 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.79 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log deleted file mode 100644 index 8db788a..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 300.00 ± 1.51 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.82 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log index 7ce4256..538e156 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 284.38 ± 0.76 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 281.11 ± 2.07 | | llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.82 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log index 82d7419..2712b2f 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 306.40 ± 1.77 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.78 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 305.36 ± 1.78 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.97 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log index fe0f034..c353f2d 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 286.08 ± 2.96 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.82 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 284.75 ± 2.41 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.80 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log index 1ae0137..02766ec 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 297.71 ± 1.73 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.78 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 298.01 ± 1.45 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.96 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log index 2464692..392d382 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log @@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x4092b180) on address 0x7fe1ddb56000. Reason: Page not present or supervisor privilege. -✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 failed (exit 134) +| model | size | params | backend | ngl | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 281.91 ± 2.33 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.79 ± 0.00 | + +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log index a5a4c8a..e512cda 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log @@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x2b6cf180) on address 0x7f9694f56000. Reason: Page not present or supervisor privilege. -✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 failed (exit 134) +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 306.77 ± 2.47 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.97 ± 0.00 | + +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log index a9689cc..d7a9f39 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 284.17 ± 2.14 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.80 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 284.71 ± 1.24 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.78 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log index b0472bd..875df42 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 300.96 ± 1.85 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.81 ± 0.00 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 299.68 ± 1.75 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.93 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log index 06fe45e..9b30c60 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 191.71 ± 1.02 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 21.03 ± 0.02 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 193.74 ± 0.96 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 21.05 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log index 9480722..d2d7b9c 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 193.39 ± 1.52 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 20.61 ± 0.01 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 194.33 ± 1.56 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 20.64 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log index 71442c3..ab3e1cd 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 213.71 ± 2.99 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 20.87 ± 0.03 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 223.80 ± 2.70 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 20.91 ± 0.02 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log index 07180ac..c152d77 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 217.08 ± 5.59 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 20.85 ± 0.01 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 228.13 ± 3.26 | +| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 20.88 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log deleted file mode 100644 index 74dae23..0000000 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 129.22 ± 0.43 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 14.25 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log deleted file mode 100644 index 3f8d6e9..0000000 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 143.48 ± 1.30 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.32 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log deleted file mode 100644 index 148c268..0000000 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0xd6b8340) reason :GPU Hang -✖ ! [rocm6_4_3-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __hblt0 failed (exit 134) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log deleted file mode 100644 index 131dcc4..0000000 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 75.22 ± 0.16 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.29 ± 0.03 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3.log deleted file mode 100644 index c9464c6..0000000 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 129.64 ± 0.35 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 14.24 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__fa1.log deleted file mode 100644 index f654098..0000000 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 144.82 ± 0.84 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.32 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__hblt0.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__hblt0.log deleted file mode 100644 index 64085e5..0000000 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 74.17 ± 0.11 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 14.27 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__hblt0__fa1.log deleted file mode 100644 index 78154a2..0000000 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 77.91 ± 0.23 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.31 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma.log index f7ed4a1..fcbf28b 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 134.57 ± 0.66 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 14.57 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 131.74 ± 0.40 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 14.54 ± 0.00 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log index b462385..7a81d3e 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 144.38 ± 0.73 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.90 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 142.30 ± 0.82 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.90 ± 0.12 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log index f807cdd..f5aeb10 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 134.69 ± 1.05 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 14.58 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 134.45 ± 0.46 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 14.58 ± 0.04 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log index ee3cd12..dd67609 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 143.45 ± 0.41 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.97 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 141.69 ± 0.47 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.95 ± 0.09 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4.log index 5491561..b011d5b 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 133.50 ± 0.67 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 14.55 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 132.25 ± 0.49 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 14.39 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log index 2cc99e2..8b89d24 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 144.31 ± 0.58 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.93 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 144.16 ± 0.77 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.87 ± 0.05 | build: 4807e8f9 (6609) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0.log index 06bee5e..7584081 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 133.54 ± 0.74 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 14.54 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 133.50 ± 0.69 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 14.55 ± 0.02 | build: 4807e8f9 (6609) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log index c8241e6..e329c2d 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 144.26 ± 0.29 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.92 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 143.26 ± 0.97 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.87 ± 0.08 | build: 4807e8f9 (6609) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log deleted file mode 100644 index 304856f..0000000 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 131.98 ± 0.86 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 14.14 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log deleted file mode 100644 index 4b9aafa..0000000 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 141.08 ± 0.51 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.29 ± 0.04 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log deleted file mode 100644 index fdc7289..0000000 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 130.87 ± 0.83 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 14.25 ± 0.05 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log deleted file mode 100644 index 011ab77..0000000 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 137.23 ± 0.55 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.32 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma.log index bac74c4..f29c25e 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 132.60 ± 0.56 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 14.33 ± 0.02 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 132.68 ± 0.50 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 14.62 ± 0.08 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log index 3ce3c01..0bdbcf4 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 143.76 ± 0.58 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.36 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 142.60 ± 0.56 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.90 ± 0.09 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log index f3e8a66..5b1eea3 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 134.24 ± 0.57 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 14.32 ± 0.03 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 133.60 ± 0.66 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 14.59 ± 0.06 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log index baf0257..200b3a6 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 141.84 ± 0.84 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.37 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 140.15 ± 1.20 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.93 ± 0.05 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log index 818e7e0..ae0c02f 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 134.45 ± 0.50 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 14.32 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 133.39 ± 0.54 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 14.62 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log index 7cfb635..53e3d92 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 145.01 ± 0.84 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.36 ± 0.02 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 146.88 ± 0.69 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 15.00 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0.log index 5c4c044..9d4c6ed 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 133.40 ± 0.48 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 14.32 ± 0.04 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 134.05 ± 0.64 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 14.64 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log index c4be1e2..bf7dc06 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log @@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x293f4180) on address 0x7f5ee4f70000. Reason: Page not present or supervisor privilege. -✖ ! [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __hblt0__fa1 failed (exit 134) +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 144.44 ± 0.78 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.99 ± 0.00 | + +build: a3cb0474 (6735) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log index 9c8e307..ce992e9 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 0 | pp512 | 131.33 ± 1.43 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 0 | tg128 | 17.27 ± 0.01 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 0 | pp512 | 136.12 ± 1.49 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 0 | tg128 | 17.32 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log index 0d15b29..538d4bf 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp512 | 133.32 ± 1.63 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg128 | 17.12 ± 0.01 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp512 | 135.43 ± 4.81 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg128 | 17.14 ± 0.02 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log index 4512dd6..d4abf61 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 0 | pp512 | 115.77 ± 1.42 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 0 | tg128 | 17.75 ± 0.01 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 0 | pp512 | 120.72 ± 3.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 0 | tg128 | 17.74 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log index 1fdb504..2c9d496 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp512 | 121.80 ± 1.81 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg128 | 18.10 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp512 | 125.48 ± 4.53 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg128 | 18.02 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma.log deleted file mode 100644 index 1e65c4b..0000000 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -hipBLASLt error: Heuristic Fetch Failed! -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 426.32 ± 6.04 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 25.00 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log deleted file mode 100644 index f0325ad..0000000 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 479.22 ± 4.77 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.91 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log deleted file mode 100644 index 3f88455..0000000 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 158.56 ± 4.20 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 25.03 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log deleted file mode 100644 index f0a6412..0000000 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 165.57 ± 2.56 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.89 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3.log deleted file mode 100644 index cff659b..0000000 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -hipBLASLt error: Heuristic Fetch Failed! -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 432.19 ± 6.24 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 25.02 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__fa1.log deleted file mode 100644 index 77fe8ec..0000000 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 477.24 ± 5.25 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.88 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0.log deleted file mode 100644 index 501ffea..0000000 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 162.44 ± 4.25 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 25.05 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log deleted file mode 100644 index 1c49a1a..0000000 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 160.17 ± 3.26 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.91 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma.log index 0769aaf..6732114 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 451.60 ± 1.80 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 436.29 ± 4.51 | | qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 25.54 ± 0.00 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log index 0258221..5fb9bdf 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 482.09 ± 5.55 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 25.77 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 480.95 ± 4.32 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 25.82 ± 0.00 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log index 80c30d1..61f0eec 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 345.46 ± 3.07 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 346.51 ± 4.73 | | qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 25.49 ± 0.00 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log index 0825c92..79b1058 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 354.93 ± 5.65 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 25.80 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 356.62 ± 6.87 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 25.85 ± 0.01 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4.log index b6bb252..1b2bbe2 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 448.97 ± 7.97 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 25.57 ± 0.01 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 435.70 ± 6.51 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.23 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log index c4e8076..c0c21f6 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 489.49 ± 3.92 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 25.78 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 485.65 ± 7.08 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 25.71 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0.log index 993b363..5d594f4 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 343.78 ± 1.91 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 25.48 ± 0.01 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 343.63 ± 2.43 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 25.52 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log index 26e3a84..63fd57e 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 363.09 ± 8.05 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 25.75 ± 0.01 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 364.38 ± 4.78 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 25.83 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log deleted file mode 100644 index 11cfce9..0000000 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 435.53 ± 2.47 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.69 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log deleted file mode 100644 index cdcd330..0000000 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 476.36 ± 3.91 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.93 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log deleted file mode 100644 index 85da425..0000000 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 330.47 ± 5.12 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 25.09 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log deleted file mode 100644 index b9f0191..0000000 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 343.19 ± 4.41 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.90 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma.log index 02d9e91..c7d6636 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 431.59 ± 5.03 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 25.06 ± 0.01 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 435.87 ± 4.36 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 25.56 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log index 7c575f0..ae8f244 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 476.09 ± 5.36 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.93 ± 0.01 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 477.05 ± 5.97 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 25.84 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log index ca2d22c..b5657e7 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 332.32 ± 3.60 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 25.11 ± 0.01 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 338.71 ± 3.32 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 25.51 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log index 368ff16..32dc9c5 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 344.55 ± 3.84 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.92 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 347.28 ± 5.57 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 25.86 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log index 579c663..5e01b02 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 431.29 ± 3.17 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 25.10 ± 0.01 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 439.13 ± 4.42 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 25.54 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log index aadc637..8f60a35 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 475.35 ± 3.41 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.94 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 482.81 ± 7.55 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 25.85 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0.log index 9061b2e..3cb901f 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 329.24 ± 2.98 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 25.06 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 334.68 ± 2.49 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 25.54 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log index ae585b8..788a85a 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 348.53 ± 5.60 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.92 ± 0.01 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 346.28 ± 2.02 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 25.80 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log index 9733b38..c1d96cf 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | pp512 | 139.51 ± 0.90 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | tg128 | 8.31 ± 0.01 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | pp512 | 216.27 ± 0.39 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | tg128 | 10.07 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log index 89ebf52..c030b4b 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 140.62 ± 1.53 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 8.26 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 216.46 ± 0.31 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 10.00 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log index f88d6ea..6fe4ca5 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | pp512 | 150.84 ± 1.38 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | tg128 | 8.24 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | pp512 | 163.35 ± 0.20 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | tg128 | 9.24 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log index 37c3d79..a9da670 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 156.53 ± 2.33 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 8.29 ± 0.01 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 166.05 ± 0.25 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 9.29 ± 0.02 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma.log deleted file mode 100644 index b163e9b..0000000 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 538.66 ± 2.16 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 53.01 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__fa1.log deleted file mode 100644 index d8e45a2..0000000 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 620.78 ± 3.75 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 52.74 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0.log deleted file mode 100644 index 4b2bee2..0000000 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 389.41 ± 1.99 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 53.13 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log deleted file mode 100644 index 0cc6bbd..0000000 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 413.64 ± 1.55 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 52.76 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3.log deleted file mode 100644 index 26ad0c6..0000000 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 543.05 ± 4.56 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 53.14 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__fa1.log deleted file mode 100644 index c9f4948..0000000 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 624.71 ± 4.66 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 52.74 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0.log deleted file mode 100644 index ee0b076..0000000 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 387.73 ± 2.27 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 53.30 ± 0.02 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0__fa1.log deleted file mode 100644 index 6fe28d1..0000000 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 415.19 ± 1.76 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 52.59 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma.log index 185f173..6c61f6d 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 577.98 ± 6.34 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 55.37 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 562.46 ± 5.25 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 55.16 ± 0.01 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1.log index fcaa877..d40f50f 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 623.53 ± 3.70 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 56.76 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 626.72 ± 6.27 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.04 ± 0.01 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0.log index fe00584..cc78e33 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 582.34 ± 4.27 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 55.34 ± 0.02 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 589.82 ± 5.37 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 55.38 ± 0.01 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log index 14fa7cd..257a045 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 622.32 ± 5.83 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 56.82 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 620.07 ± 8.69 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 56.88 ± 0.01 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4.log index feb7eb4..98cada8 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 582.99 ± 4.97 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 55.33 ± 0.02 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 556.95 ± 4.88 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 54.93 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log index f79c6d2..e913334 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 632.12 ± 3.63 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 56.73 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 632.67 ± 5.74 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 56.81 ± 0.01 | build: 4807e8f9 (6609) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0.log index 4fb2d36..0b85b91 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 582.14 ± 4.21 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 55.39 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 582.53 ± 3.05 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 55.41 ± 0.01 | build: 4807e8f9 (6609) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log index 350eefc..48c1c1e 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 632.63 ± 4.35 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 56.77 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 629.19 ± 4.25 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 56.94 ± 0.01 | build: 4807e8f9 (6609) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants.log deleted file mode 100644 index a014b00..0000000 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 552.48 ± 1.39 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 53.22 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants__fa1.log deleted file mode 100644 index 2e173b9..0000000 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 618.51 ± 8.44 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 52.82 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0.log deleted file mode 100644 index be7305a..0000000 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 576.87 ± 7.86 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 53.42 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log deleted file mode 100644 index 9741f2c..0000000 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 609.51 ± 4.26 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 52.77 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma.log index 9bb2bf0..ecf9b98 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 555.30 ± 3.11 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 53.34 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 557.13 ± 5.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 55.60 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log index f4f3cce..d5596ea 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 618.71 ± 2.77 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 52.77 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 623.10 ± 4.22 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 56.95 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0.log index d707246..6caa371 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 575.05 ± 4.27 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 53.33 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 586.15 ± 3.69 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 55.49 ± 0.02 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log index 1856861..d7e04e5 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 618.89 ± 4.53 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 52.69 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 619.92 ± 6.18 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.09 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log index ac9d02d..f8fc311 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 549.65 ± 6.16 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 53.42 ± 0.02 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 553.07 ± 3.87 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 55.49 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log index 4848dd8..b946331 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 621.80 ± 7.09 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 52.78 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 632.51 ± 3.87 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 56.97 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0.log index 37ba478..bf0ad55 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 575.05 ± 3.02 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 53.42 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 588.10 ± 4.51 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 55.49 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log index 75f7de4..041dc76 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 614.05 ± 4.83 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 52.83 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 631.26 ± 5.65 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 56.97 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log index e1f8a5a..c9faa6f 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 0 | pp512 | 1027.23 ± 5.64 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 0 | tg128 | 63.42 ± 0.03 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 0 | pp512 | 1053.02 ± 7.31 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 0 | tg128 | 63.84 ± 0.04 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log index 460a956..8d5b733 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 1005.86 ± 4.35 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 59.12 ± 0.04 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 1020.41 ± 5.76 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 59.42 ± 0.04 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log index e4e2810..fbfb810 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 0 | pp512 | 764.63 ± 3.75 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 0 | tg128 | 64.77 ± 0.10 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 0 | pp512 | 823.60 ± 3.91 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 0 | tg128 | 64.74 ± 0.13 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log index b854ab4..13a53e9 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 801.22 ± 4.04 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 63.44 ± 0.12 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 857.47 ± 4.38 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 63.41 ± 0.08 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma.log deleted file mode 100644 index 433401a..0000000 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 726.41 ± 1.42 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.15 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__fa1.log deleted file mode 100644 index 1e7652b..0000000 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 822.38 ± 0.84 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.87 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0.log deleted file mode 100644 index 3242eea..0000000 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 219.78 ± 3.65 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.15 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log deleted file mode 100644 index 8b27e59..0000000 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 227.29 ± 2.29 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.87 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3.log deleted file mode 100644 index 3cdf4e3..0000000 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 703.97 ± 0.49 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.15 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__fa1.log deleted file mode 100644 index 5d82b39..0000000 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 803.68 ± 0.98 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.89 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0.log deleted file mode 100644 index 2121415..0000000 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 222.73 ± 0.32 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.15 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0__fa1.log deleted file mode 100644 index 299085e..0000000 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 220.75 ± 2.43 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.86 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma.log index 17398bd..4a51b81 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 754.71 ± 0.79 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.16 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 747.69 ± 1.06 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.17 ± 0.00 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log index 1ce720a..b7f2bb0 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 803.95 ± 0.73 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.07 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 809.20 ± 0.75 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.11 ± 0.00 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0.log index 2b06f55..6ed8490 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 768.26 ± 1.35 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.15 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 781.56 ± 1.57 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.16 ± 0.00 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log index df3734a..e28a2d0 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 814.89 ± 0.73 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.08 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 819.61 ± 0.83 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.11 ± 0.00 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4.log index 672a797..2cf28d1 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 751.85 ± 1.59 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.16 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 720.89 ± 0.70 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.15 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log index 6023ebe..888ad2c 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 814.18 ± 1.01 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 815.58 ± 1.47 | | gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.08 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0.log index 12ac183..a1c9d13 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 769.51 ± 0.90 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 772.99 ± 2.45 | | gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.15 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log index 98f7cdf..e3c4756 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 824.93 ± 0.75 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.08 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 824.58 ± 1.69 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.10 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants.log deleted file mode 100644 index b9c2ba9..0000000 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 714.52 ± 1.47 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.16 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants__fa1.log deleted file mode 100644 index dd2b67e..0000000 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 810.36 ± 1.88 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.89 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0.log deleted file mode 100644 index b5de28a..0000000 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 752.18 ± 0.86 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.14 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log deleted file mode 100644 index 3a710b2..0000000 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 797.91 ± 0.87 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.88 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma.log index d2b180e..1ca88e0 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 738.56 ± 1.66 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.15 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 734.90 ± 0.79 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.17 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log index 7e8c499..c4a6a7a 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 810.24 ± 2.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.89 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 799.43 ± 1.14 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.11 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0.log index 6871a37..ed2ad92 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 751.87 ± 1.69 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.15 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 762.49 ± 1.04 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.17 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log index a0de968..2dfa40d 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 798.06 ± 1.45 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.88 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 788.46 ± 1.56 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.13 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log index 48ce734..7685df5 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 738.50 ± 1.56 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.15 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 734.16 ± 0.84 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.16 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log index a0dd7bf..bb91b3b 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 801.53 ± 1.48 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.88 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 816.86 ± 0.80 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.13 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0.log index 352a972..f1f644a 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 751.81 ± 0.96 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.15 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 763.42 ± 1.37 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.17 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log index 3bdf7bd..d8b9701 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 791.04 ± 2.55 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.87 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 806.70 ± 1.39 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.12 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log index 431a37b..4af2fdc 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 0 | pp512 | 679.86 ± 1.33 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 0 | tg128 | 14.60 ± 0.03 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 0 | pp512 | 668.85 ± 1.34 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 0 | tg128 | 14.64 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log index bbdf2e7..2cbc33d 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp512 | 659.67 ± 0.72 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg128 | 14.50 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp512 | 648.34 ± 0.61 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg128 | 14.52 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log index 2c0ca73..cd18136 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 0 | pp512 | 504.31 ± 3.20 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 0 | tg128 | 14.14 ± 0.01 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 0 | pp512 | 541.39 ± 3.33 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 0 | tg128 | 14.17 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log index f084df4..db5ed22 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp512 | 501.78 ± 2.80 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg128 | 13.95 ± 0.01 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp512 | 532.11 ± 3.15 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg128 | 13.98 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma.log deleted file mode 100644 index 0199908..0000000 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -hipBLASLt error: Heuristic Fetch Failed! -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 413.72 ± 0.86 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.09 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log deleted file mode 100644 index 86b7295..0000000 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 469.46 ± 1.37 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.11 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log deleted file mode 100644 index eb32966..0000000 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 84.71 ± 8.12 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.10 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log deleted file mode 100644 index cc49148..0000000 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 84.12 ± 9.82 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.11 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3.log deleted file mode 100644 index 8dc76ac..0000000 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -hipBLASLt error: Heuristic Fetch Failed! -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 408.40 ± 1.09 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.10 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__fa1.log deleted file mode 100644 index f94a57d..0000000 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 470.49 ± 1.46 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.10 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0.log deleted file mode 100644 index 4fb6887..0000000 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 84.93 ± 8.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.10 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log deleted file mode 100644 index ee44cb5..0000000 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 83.22 ± 10.78 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.10 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma.log index d4000d3..09c19a8 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 425.33 ± 1.61 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.11 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 420.14 ± 0.69 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.10 ± 0.00 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log index a3a80d9..1a4d459 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 470.80 ± 1.97 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.10 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 468.87 ± 0.84 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.11 ± 0.00 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log index 7dcd8b2..a9a2107 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 469.59 ± 0.76 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.04 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 477.22 ± 0.76 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.10 ± 0.00 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log index d193232..033670c 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 524.38 ± 0.70 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.10 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 524.62 ± 0.55 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.11 ± 0.00 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4.log index 42dd307..21f076f 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 418.14 ± 0.79 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 413.24 ± 0.72 | | gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.10 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log index b0b764e..5e53f53 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 472.28 ± 1.24 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 471.95 ± 1.68 | | gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.10 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0.log index 9a91629..9e006da 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 471.56 ± 0.60 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 471.17 ± 0.97 | | gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.10 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log index d91b6ce..95c2c68 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 530.58 ± 0.66 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.11 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 529.49 ± 1.20 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.10 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log deleted file mode 100644 index 3332c2a..0000000 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 412.86 ± 1.22 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.10 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log deleted file mode 100644 index a8e7f5a..0000000 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 465.55 ± 1.95 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.10 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log deleted file mode 100644 index df6e790..0000000 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 453.66 ± 0.77 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.10 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log deleted file mode 100644 index 89d3127..0000000 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 498.77 ± 0.53 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.10 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma.log index 282d7f2..98c4607 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 419.05 ± 0.86 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.09 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 421.40 ± 0.43 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.10 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log index 2c4f703..b0606d8 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 466.36 ± 1.34 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.10 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 464.58 ± 0.58 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.11 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log index 4cda7ad..98a3a10 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 451.57 ± 0.41 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.09 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 458.08 ± 0.81 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.10 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log index c745c9f..b2bf9fe 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 499.87 ± 0.47 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.10 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 499.11 ± 0.58 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.11 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log index 9f5c43c..76634c0 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 422.00 ± 0.56 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.10 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 413.95 ± 0.60 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.00 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log index e43f6df..23fed27 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 469.45 ± 1.83 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 469.08 ± 0.27 | | gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.11 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0.log index 2253da3..2ff4d14 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 453.24 ± 0.64 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.09 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 457.65 ± 0.59 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.10 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log index 98a164a..4664ca1 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 502.26 ± 0.81 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.10 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 503.26 ± 0.79 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.11 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk.log index f20e9bc..cd92808 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk.log @@ -3,6 +3,6 @@ ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | ggml_vulkan: Device memory allocation of size 2819260416 failed. -ggml_vulkan: Requested buffer size exceeds device memory allocation limit: ErrorOutOfDeviceMemory +ggml_vulkan: Requested buffer size exceeds device buffer size limit: ErrorOutOfDeviceMemory main: error: failed to load model '/home/kyuz0/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf' ✖ ! [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002 failed (exit 1) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log index 2578dff..3b0ee1d 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log @@ -3,6 +3,6 @@ ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | ggml_vulkan: Device memory allocation of size 2819260416 failed. -ggml_vulkan: Requested buffer size exceeds device memory allocation limit: ErrorOutOfDeviceMemory +ggml_vulkan: Requested buffer size exceeds device buffer size limit: ErrorOutOfDeviceMemory main: error: failed to load model '/home/kyuz0/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf' ✖ ! [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002 __fa1 failed (exit 1) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log index 19c65b3..fdd7c39 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 0 | pp512 | 134.52 ± 0.99 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 0 | tg128 | 3.92 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 0 | pp512 | 103.58 ± 1.09 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 0 | tg128 | 3.93 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log index a1f09eb..26c11fc 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | pp512 | 138.59 ± 1.23 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | tg128 | 3.93 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | pp512 | 107.33 ± 1.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | tg128 | 3.91 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma.log deleted file mode 100644 index 8013724..0000000 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 1886.62 ± 6.81 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 76.36 ± 5.10 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__fa1.log deleted file mode 100644 index 725652c..0000000 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2282.08 ± 7.86 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 72.40 ± 0.03 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0.log deleted file mode 100644 index 3fa4608..0000000 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 713.12 ± 38.25 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 67.01 ± 5.06 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0__fa1.log deleted file mode 100644 index 978ff37..0000000 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 676.80 ± 75.42 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 59.10 ± 2.57 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3.log deleted file mode 100644 index 654df84..0000000 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 1857.54 ± 7.32 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 75.34 ± 7.91 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__fa1.log deleted file mode 100644 index 1842c2a..0000000 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2214.91 ± 7.20 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 72.36 ± 0.05 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0.log deleted file mode 100644 index 83e72da..0000000 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 656.82 ± 60.97 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 63.81 ± 3.45 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0__fa1.log deleted file mode 100644 index 60ffb7f..0000000 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 663.36 ± 79.77 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 58.63 ± 2.61 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma.log index 092f913..1e894d0 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 2110.44 ± 6.13 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 79.31 ± 0.03 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 1935.04 ± 3.89 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 79.17 ± 0.01 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1.log index 7aeae92..3e8bfa0 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2261.02 ± 8.46 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 77.07 ± 0.04 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2278.78 ± 8.79 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 76.94 ± 0.01 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0.log index ffa9c72..fd554fe 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 2040.30 ± 9.11 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 79.33 ± 0.05 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 2072.56 ± 8.20 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 79.03 ± 0.02 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1.log index de860ae..15697a2 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2143.83 ± 3.82 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 77.19 ± 0.02 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2158.84 ± 4.74 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 77.11 ± 0.04 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4.log index b1c6f95..712d57c 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 2099.80 ± 6.34 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 79.43 ± 0.05 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 1896.32 ± 6.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 79.32 ± 0.08 | build: 4807e8f9 (6609) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log index 6dd1c51..d57318f 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2262.00 ± 6.48 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 77.04 ± 0.03 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2261.52 ± 12.45 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 77.18 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0.log index a41b287..1fabbb8 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 2038.14 ± 6.72 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 79.41 ± 0.04 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 2038.57 ± 4.97 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 79.42 ± 0.02 | build: 4807e8f9 (6609) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log index 11e13dd..02ce6c2 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2141.85 ± 6.83 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 77.14 ± 0.02 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2127.98 ± 4.53 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 77.17 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants.log deleted file mode 100644 index 0a762e2..0000000 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 1830.34 ± 15.12 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 60.04 ± 4.39 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants__fa1.log deleted file mode 100644 index b4e8cfb..0000000 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2178.17 ± 91.83 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 55.78 ± 3.20 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants__hblt0.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants__hblt0.log deleted file mode 100644 index 9113e30..0000000 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 2016.93 ± 4.81 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 58.29 ± 3.79 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log deleted file mode 100644 index 8260f58..0000000 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2094.58 ± 12.74 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 66.23 ± 8.72 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma.log index d25fd4e..6382308 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 1865.95 ± 7.12 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 79.56 ± 0.09 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 1858.93 ± 11.45 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 79.18 ± 0.02 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log index fe4b290..6cc66c6 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2252.60 ± 11.74 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 72.57 ± 0.05 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2249.97 ± 8.38 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 77.23 ± 0.02 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0.log index 5483c20..cf36853 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 2011.51 ± 6.91 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 79.65 ± 0.03 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 2045.87 ± 7.65 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 79.18 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log index e572ab1..1615a42 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2111.65 ± 7.03 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 67.62 ± 4.71 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2110.98 ± 11.29 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 77.03 ± 0.03 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log index 362a77a..cceaf6f 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 1869.83 ± 5.67 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 79.48 ± 0.04 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 1865.48 ± 5.97 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 79.26 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log index 39df255..21750b0 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2229.43 ± 7.33 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 72.58 ± 0.03 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2265.97 ± 12.20 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 77.23 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0.log index 53225e6..6d11543 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 2014.48 ± 4.39 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 79.61 ± 0.04 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 2034.18 ± 7.80 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 79.20 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log index d2c4064..ad23019 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2064.91 ± 7.11 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 72.45 ± 0.03 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2104.47 ± 6.90 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 77.16 ± 0.02 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log index 65403f9..38633bd 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 0 | pp512 | 1288.81 ± 206.13 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 0 | tg128 | 86.61 ± 1.74 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 0 | pp512 | 1417.85 ± 229.30 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 0 | tg128 | 85.91 ± 1.98 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log index 14f5f51..c23a128 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp512 | 1149.64 ± 181.24 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg128 | 85.50 ± 1.74 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp512 | 1193.42 ± 154.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg128 | 82.87 ± 1.37 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log index 799317c..e37eefd 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 0 | pp512 | 967.51 ± 123.30 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 0 | tg128 | 86.74 ± 1.45 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 0 | pp512 | 1256.94 ± 209.42 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 0 | tg128 | 92.19 ± 0.42 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log index 513be95..fd50648 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp512 | 991.94 ± 120.98 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg128 | 85.61 ± 0.84 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp512 | 1150.84 ± 174.29 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg128 | 85.89 ± 0.11 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma.log deleted file mode 100644 index 9e4b08e..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -hipBLASLt error: Heuristic Fetch Failed! -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 629.19 ± 3.98 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 34.79 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__fa1.log deleted file mode 100644 index 5fe051d..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 780.88 ± 9.39 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 34.14 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0.log deleted file mode 100644 index 8a0d184..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 364.08 ± 1.11 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 34.81 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0__fa1.log deleted file mode 100644 index aaf527f..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 400.84 ± 0.84 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 34.17 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_3.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3.log deleted file mode 100644 index 41af850..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_3.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -hipBLASLt error: Heuristic Fetch Failed! -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 627.57 ± 4.14 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 34.78 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_3__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3__fa1.log deleted file mode 100644 index f097e15..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_3__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 783.40 ± 1.22 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 34.15 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_3__hblt0.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3__hblt0.log deleted file mode 100644 index 7346088..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_3__hblt0.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x37f5d310) reason :GPU Hang -✖ ! [rocm6_4_3] gpt-oss-120b-F16 __hblt0 failed (exit 134) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_3__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3__hblt0__fa1.log deleted file mode 100644 index 8eda0fb..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_3__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 402.16 ± 1.31 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 34.16 ± 0.02 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma.log index 6f18e5c..ed29934 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 683.95 ± 7.54 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 34.82 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 667.15 ± 5.65 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 34.77 ± 0.00 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__fa1.log index e759809..3496a41 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 783.37 ± 6.29 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 35.06 ± 0.01 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 786.49 ± 4.02 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 35.16 ± 0.00 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0.log index f285cae..ed3709f 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 689.85 ± 4.60 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 34.84 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 700.13 ± 3.54 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 34.79 ± 0.00 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0__fa1.log index aca0d6e..6e64f96 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 789.94 ± 5.16 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 35.17 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 783.50 ± 5.44 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 35.12 ± 0.00 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4.log index 0e3533b..e08e51b 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 682.09 ± 3.61 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 34.89 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 650.45 ± 2.11 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 32.90 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4__fa1.log index 2f2e390..6b8906f 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 790.76 ± 6.72 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 35.06 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 790.90 ± 4.05 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 34.98 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4__hblt0.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4__hblt0.log index a70aa6f..4bc3cf3 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4__hblt0.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 688.37 ± 4.43 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 34.74 ± 0.01 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 687.78 ± 5.65 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 34.80 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4__hblt0__fa1.log index 5a2445a..9889db6 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 777.75 ± 25.64 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 35.12 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 792.00 ± 9.77 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 35.14 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants.log deleted file mode 100644 index c69bc04..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 650.02 ± 4.28 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 34.00 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants__fa1.log deleted file mode 100644 index 816395b..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 778.25 ± 3.40 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 34.25 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants__hblt0.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants__hblt0.log deleted file mode 100644 index 3948df0..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 688.70 ± 7.72 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 34.83 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log deleted file mode 100644 index 9ccc2fb..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 780.39 ± 6.28 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 34.20 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma.log index f7645fb..f06c14e 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 653.89 ± 3.96 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 34.89 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 660.37 ± 3.05 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 34.80 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log index c286eb1..9eab1bb 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 770.19 ± 5.64 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 34.18 ± 0.01 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 770.55 ± 4.47 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 35.07 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0.log index ab867d3..6d6b988 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 691.27 ± 4.90 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 34.83 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 698.86 ± 6.39 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 34.80 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log index 07e6796..3454955 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 772.44 ± 6.68 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 34.24 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 777.48 ± 7.78 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 35.14 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc.log index 34bd11a..e1cd695 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 653.09 ± 7.25 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 34.86 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 666.29 ± 5.04 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 34.80 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc__fa1.log index 384d751..18b3f18 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc__fa1.log @@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 779.77 ± 4.78 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 34.28 ± 0.00 | - -build: f1fbffb5 (6486) +HW Exception by GPU node-1 (Agent handle: 0x6990260) reason :GPU Hang +✖ ! [rocm7_rc] gpt-oss-120b-F16 __fa1 failed (exit 134) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0.log index 3cc22b3..d6914c0 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 683.90 ± 5.36 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 34.77 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 702.07 ± 4.76 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 34.78 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log index 1f2d84e..e3e2103 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 777.37 ± 4.77 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 34.17 ± 0.00 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 760.87 ± 22.70 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 35.07 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk.log b/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk.log index d68fc95..6027dce 100644 --- a/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk.log +++ b/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 0 | pp512 | 626.37 ± 2.34 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 0 | tg128 | 35.23 ± 0.01 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 0 | pp512 | 627.11 ± 1.45 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 0 | tg128 | 35.32 ± 0.02 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log b/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log index 10c4fa1..bf8a3fa 100644 --- a/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 723.80 ± 2.49 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 34.59 ± 0.01 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 719.39 ± 2.63 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 34.71 ± 0.02 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-120b-F16__vulkan_radv.log b/benchmark/results/gpt-oss-120b-F16__vulkan_radv.log index 80e1e5e..41e503c 100644 --- a/benchmark/results/gpt-oss-120b-F16__vulkan_radv.log +++ b/benchmark/results/gpt-oss-120b-F16__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 0 | pp512 | 401.61 ± 1.76 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 0 | tg128 | 33.86 ± 0.03 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 0 | pp512 | 433.14 ± 1.74 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 0 | tg128 | 33.99 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-120b-F16__vulkan_radv__fa1.log b/benchmark/results/gpt-oss-120b-F16__vulkan_radv__fa1.log index 2c9a045..bc7473a 100644 --- a/benchmark/results/gpt-oss-120b-F16__vulkan_radv__fa1.log +++ b/benchmark/results/gpt-oss-120b-F16__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 444.61 ± 1.65 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 33.84 ± 0.02 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 481.71 ± 2.11 | +| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 34.46 ± 0.02 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma.log deleted file mode 100644 index 3b2c5d3..0000000 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 612.55 ± 6.58 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 47.08 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__fa1.log deleted file mode 100644 index 78f5bac..0000000 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 766.08 ± 2.67 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 45.93 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log deleted file mode 100644 index 6d219e6..0000000 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 362.01 ± 1.06 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 47.04 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log deleted file mode 100644 index 0808d48..0000000 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x32c91340) reason :GPU Hang -✖ ! [rocm6_4_3-rocwmma] gpt-oss-120b-mxfp4-00001-of-00003 __hblt0__fa1 failed (exit 134) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3.log deleted file mode 100644 index 7506230..0000000 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 614.68 ± 3.32 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 47.04 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__fa1.log deleted file mode 100644 index 8093c5f..0000000 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 768.28 ± 5.81 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 45.86 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0.log deleted file mode 100644 index 636f327..0000000 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 362.06 ± 1.45 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 47.11 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0__fa1.log deleted file mode 100644 index 957a4d0..0000000 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 397.06 ± 1.41 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 46.01 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma.log index 58ee406..750266d 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 668.07 ± 3.99 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 47.22 ± 0.01 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 653.32 ± 7.07 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 47.09 ± 0.01 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1.log index 7b6a6b6..8561f76 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 767.63 ± 5.37 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 47.72 ± 0.01 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 767.28 ± 2.81 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 47.63 ± 0.01 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log index 2509d50..a3f8159 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 685.61 ± 4.60 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 47.15 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 703.72 ± 4.21 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 47.05 ± 0.02 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log index 85d58d8..78d5aa9 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 785.43 ± 4.63 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 47.65 ± 0.01 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 773.91 ± 4.34 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 47.61 ± 0.03 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4.log index b26e84b..251f8d5 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 664.62 ± 3.53 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 47.11 ± 0.01 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 644.73 ± 4.21 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 46.15 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log index e27bc1a..48364b4 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 773.25 ± 6.50 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 47.69 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 766.09 ± 8.12 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 47.51 ± 0.01 | build: 4807e8f9 (6609) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0.log index b4438a1..d7b71ca 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 686.92 ± 5.29 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 47.15 ± 0.01 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 660.34 ± 48.62 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 46.72 ± 0.39 | build: 4807e8f9 (6609) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log index 380679c..45683fa 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 781.60 ± 6.15 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 47.76 ± 0.01 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 780.39 ± 3.58 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 47.70 ± 0.01 | build: 4807e8f9 (6609) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log deleted file mode 100644 index 85ade63..0000000 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 639.82 ± 2.41 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 46.29 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log deleted file mode 100644 index 28c282b..0000000 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 756.98 ± 1.30 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 46.00 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log deleted file mode 100644 index ccbcd7a..0000000 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 683.94 ± 2.89 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 47.29 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log deleted file mode 100644 index 8b573b2..0000000 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 778.15 ± 4.46 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 46.04 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma.log index b346209..8222179 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 641.91 ± 7.56 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 47.20 ± 0.01 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 651.94 ± 3.45 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 47.17 ± 0.02 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log index 4300bb0..6fe114d 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 756.17 ± 4.24 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 46.05 ± 0.01 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 756.58 ± 4.67 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 47.62 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0.log index d80b648..303ed9d 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 681.37 ± 3.54 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 47.19 ± 0.01 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 700.53 ± 1.99 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 47.17 ± 0.02 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log index 428b0ae..224167a 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 770.60 ± 3.18 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 46.09 ± 0.01 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 772.03 ± 9.61 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 47.64 ± 0.04 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log index b581add..140d013 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 641.87 ± 3.27 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 47.17 ± 0.01 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 650.20 ± 4.73 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 47.07 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log index 8aaf148..4bae853 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 757.39 ± 3.80 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 46.04 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 755.62 ± 4.68 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 47.70 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0.log index cb85c29..4a49ba5 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 688.94 ± 3.90 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 47.06 ± 0.01 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 698.26 ± 2.01 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 47.05 ± 0.12 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log index 53cccf3..5a90e19 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 769.31 ± 5.48 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 46.07 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 773.20 ± 7.58 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 47.65 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log index fc9cedb..f988571 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 0 | pp512 | 682.60 ± 3.30 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 0 | tg128 | 51.41 ± 0.01 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 0 | pp512 | 681.25 ± 3.69 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 0 | tg128 | 51.65 ± 0.03 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log index 82270b1..5bf0d19 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 790.49 ± 4.84 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 50.15 ± 0.01 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 788.46 ± 4.36 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 50.32 ± 0.03 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log index f38971f..a3c335c 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 0 | pp512 | 426.15 ± 2.65 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 0 | tg128 | 52.79 ± 0.16 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 0 | pp512 | 464.26 ± 2.62 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 0 | tg128 | 52.85 ± 0.16 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log index d3f726b..7182fbe 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 478.69 ± 3.26 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 52.75 ± 0.06 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 526.13 ± 3.20 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 52.90 ± 0.05 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma.log deleted file mode 100644 index 802b5a6..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -hipBLASLt error: Heuristic Fetch Failed! -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1140.40 ± 8.72 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.24 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__fa1.log deleted file mode 100644 index 5aa7e87..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1492.30 ± 22.33 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 26.96 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0.log deleted file mode 100644 index 5cd945e..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 327.64 ± 1.89 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.19 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0__fa1.log deleted file mode 100644 index 8086999..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 342.77 ± 3.39 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 27.00 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_3.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3.log deleted file mode 100644 index 64e0253..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_3.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -hipBLASLt error: Heuristic Fetch Failed! -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1147.38 ± 6.40 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.24 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_3__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3__fa1.log deleted file mode 100644 index 2e70988..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_3__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1508.59 ± 26.99 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 27.00 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_3__hblt0.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3__hblt0.log deleted file mode 100644 index 4437a50..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_3__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 326.33 ± 6.68 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.20 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_3__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3__hblt0__fa1.log deleted file mode 100644 index 3ae063a..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_3__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 344.41 ± 7.32 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 26.96 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma.log index 50331f8..6df4ffa 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1253.42 ± 6.47 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.29 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1230.17 ± 12.16 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.22 ± 0.00 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__fa1.log index b0a6f2f..09ce36a 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1502.41 ± 9.99 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 27.35 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1493.11 ± 16.19 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 27.30 ± 0.00 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0.log index fd5dc3d..94200e9 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1234.38 ± 12.52 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.25 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1274.89 ± 11.66 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.18 ± 0.00 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0__fa1.log index 5a05042..d7ccdc5 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1463.75 ± 8.49 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 27.34 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1460.62 ± 17.09 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 27.32 ± 0.00 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4.log index 007f05a..1c7b7d5 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1258.74 ± 12.44 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.27 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1163.88 ± 56.10 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 25.78 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4__fa1.log index c19f17f..705807c 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1513.34 ± 10.79 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 27.35 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1508.43 ± 11.78 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 27.36 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4__hblt0.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4__hblt0.log index 41dede3..c3de17a 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4__hblt0.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1235.02 ± 7.10 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1238.64 ± 11.98 | | gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.26 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4__hblt0__fa1.log index b6c968d..2885396 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1475.65 ± 12.28 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 27.32 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1492.62 ± 19.37 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 27.36 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants.log deleted file mode 100644 index 2e92934..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1202.41 ± 13.79 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 26.03 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants__fa1.log deleted file mode 100644 index e6da7d3..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1484.60 ± 5.26 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 26.90 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants__hblt0.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants__hblt0.log deleted file mode 100644 index 7d9f2db..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1225.63 ± 9.42 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.25 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log deleted file mode 100644 index d23467b..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1421.82 ± 12.16 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 26.95 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma.log index 8300d8a..beec68f 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1209.21 ± 16.57 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.23 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1220.88 ± 18.41 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.18 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log index 9cb1cb9..2965353 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1489.00 ± 6.12 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 26.98 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1470.86 ± 14.39 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 27.29 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0.log index 9bb131b..aeb83b4 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1218.32 ± 13.00 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.21 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1252.31 ± 14.38 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.17 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log index 2210bb8..54fdc4a 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1424.60 ± 8.06 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 26.98 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1421.53 ± 7.06 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 27.32 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc.log index 1aa5162..f3a8e8e 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1198.99 ± 21.23 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.25 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1228.62 ± 4.47 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.17 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc__fa1.log index dd419ad..6bafac8 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1506.46 ± 15.83 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 26.98 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1492.83 ± 17.46 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 27.29 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0.log index 9a0a4d9..72962bc 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1224.83 ± 11.58 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.20 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1263.37 ± 8.50 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.18 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log index 96a9cd4..f574c02 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1446.22 ± 17.28 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 26.99 ± 0.00 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1426.10 ± 25.91 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 27.35 ± 0.00 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk.log b/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk.log index 8e3da78..ec1d4a0 100644 --- a/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk.log +++ b/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 0 | pp512 | 469.06 ± 6.82 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 0 | tg128 | 15.25 ± 0.01 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 0 | pp512 | 566.88 ± 3.31 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 0 | tg128 | 18.39 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log b/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log index efee579..f7a30a9 100644 --- a/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 514.17 ± 6.35 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 15.13 ± 0.01 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 609.37 ± 2.58 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 18.25 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-20b-F32__vulkan_radv.log b/benchmark/results/gpt-oss-20b-F32__vulkan_radv.log index 7b2fe16..9f8373e 100644 --- a/benchmark/results/gpt-oss-20b-F32__vulkan_radv.log +++ b/benchmark/results/gpt-oss-20b-F32__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 0 | pp512 | 537.62 ± 2.10 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 0 | tg128 | 14.85 ± 0.01 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 0 | pp512 | 423.31 ± 2.25 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 0 | tg128 | 16.82 ± 0.02 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-20b-F32__vulkan_radv__fa1.log b/benchmark/results/gpt-oss-20b-F32__vulkan_radv__fa1.log index 830fbc3..60f2dbb 100644 --- a/benchmark/results/gpt-oss-20b-F32__vulkan_radv__fa1.log +++ b/benchmark/results/gpt-oss-20b-F32__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 583.32 ± 3.38 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 14.86 ± 0.01 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 451.11 ± 2.96 | +| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 16.83 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma.log deleted file mode 100644 index 65b10c2..0000000 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1164.39 ± 11.24 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 67.35 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__fa1.log deleted file mode 100644 index 1edd08e..0000000 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1531.44 ± 9.83 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 65.78 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0.log deleted file mode 100644 index 1f991a5..0000000 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 590.66 ± 1.42 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 67.35 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0__fa1.log deleted file mode 100644 index 084ea0a..0000000 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 653.80 ± 1.17 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 65.72 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3.log deleted file mode 100644 index aae04b7..0000000 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3.log +++ /dev/null @@ -1,15 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -rocBLAS error: No hipBLASLt solution found -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set. - -rocBLAS warning: hipBlasLT failed, falling back to tensile. -This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1160.12 ± 12.72 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 67.19 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3__fa1.log deleted file mode 100644 index eb25e8e..0000000 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1539.79 ± 14.33 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 65.81 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0.log deleted file mode 100644 index cee8253..0000000 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 591.28 ± 2.68 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 67.35 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0__fa1.log deleted file mode 100644 index 7dbc6fc..0000000 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 655.10 ± 1.75 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 65.85 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma.log index b8cffdf..39c5bd8 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1276.57 ± 15.26 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 67.47 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1247.40 ± 5.38 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 67.13 ± 0.02 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1.log index 7e59410..0898740 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1520.24 ± 18.05 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 68.08 ± 0.01 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1529.26 ± 3.68 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 68.05 ± 0.01 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0.log index 5098e0d..edbd049 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1335.36 ± 7.22 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 67.28 ± 0.01 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1375.72 ± 12.99 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 67.16 ± 0.01 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1.log index 8ee2cf2..9628548 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1575.76 ± 15.77 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 68.18 ± 0.02 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1585.34 ± 7.29 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 68.08 ± 0.02 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4.log index f71c809..5d3ee78 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4.log @@ -9,7 +9,7 @@ rocBLAS warning: hipBlasLT failed, falling back to tensile. This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set. | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1270.02 ± 3.61 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 67.37 ± 0.01 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1220.54 ± 7.38 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 67.22 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log index e53da19..ff4cb9d 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1533.65 ± 17.58 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 68.13 ± 0.01 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1534.52 ± 6.57 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 68.16 ± 0.02 | build: 4807e8f9 (6609) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0.log index 49fe2c4..fa5f5cf 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1337.89 ± 14.39 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 67.39 ± 0.01 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1329.52 ± 7.97 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 67.20 ± 0.01 | build: 4807e8f9 (6609) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log index a972365..fdd5113 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1587.21 ± 12.01 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 68.25 ± 0.01 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1601.11 ± 22.80 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 68.21 ± 0.00 | build: 4807e8f9 (6609) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants.log deleted file mode 100644 index a52d858..0000000 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1222.12 ± 10.04 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 67.34 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants__fa1.log deleted file mode 100644 index 5070ee2..0000000 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1515.09 ± 6.22 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 65.75 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants__hblt0.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants__hblt0.log deleted file mode 100644 index 598c877..0000000 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1335.14 ± 17.02 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 67.33 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log deleted file mode 100644 index 8fe24ad..0000000 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1562.66 ± 9.76 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 65.84 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma.log index f28a63e..d629bad 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1215.59 ± 8.93 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 67.39 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1239.97 ± 8.69 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 67.10 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log index c75c722..cdc2616 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1521.41 ± 10.84 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 65.89 ± 0.01 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1506.28 ± 15.62 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 67.98 ± 0.03 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0.log index fdcb0a4..005bec1 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1334.89 ± 9.58 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 67.43 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1381.33 ± 11.13 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 67.13 ± 0.02 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log index f407d2f..bd12b56 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1567.58 ± 12.62 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 65.78 ± 0.01 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1563.47 ± 11.28 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 67.91 ± 0.02 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc.log index 0607b26..e7e4a3c 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1219.34 ± 5.57 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 67.37 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1239.41 ± 5.44 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 67.10 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log index 120f9df..f709e2d 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1530.70 ± 9.71 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 65.84 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1508.59 ± 7.75 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 67.92 ± 0.02 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0.log index 079a2c7..59c8dd1 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1331.00 ± 21.19 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 67.41 ± 0.01 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1368.12 ± 12.11 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 67.09 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log index 1d5c6f3..0cc8bcd 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1575.63 ± 16.60 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 65.76 ± 0.01 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1566.75 ± 13.55 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 67.99 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log index 3df5436..089ce35 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 0 | pp512 | 1498.39 ± 12.53 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 0 | tg128 | 74.08 ± 0.09 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 0 | pp512 | 1515.08 ± 10.36 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 0 | tg128 | 74.59 ± 0.07 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log index 51abfd5..ca541fd 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1914.72 ± 22.77 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 72.57 ± 0.12 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1908.57 ± 17.12 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 72.91 ± 0.04 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv.log index 460e04d..ca19264 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 0 | pp512 | 1002.66 ± 7.71 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 0 | tg128 | 74.77 ± 0.18 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 0 | pp512 | 1097.23 ± 7.32 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 0 | tg128 | 74.95 ± 0.40 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log index 2a7699d..f3b5d7c 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1204.49 ± 13.52 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 74.94 ± 0.14 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1340.77 ± 10.85 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 75.19 ± 0.11 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma.log deleted file mode 100644 index a4d3dc7..0000000 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 979.46 ± 1.57 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.90 ± 0.02 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__fa1.log deleted file mode 100644 index 2b466fa..0000000 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1100.15 ± 1.95 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 49.29 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__hblt0.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__hblt0.log deleted file mode 100644 index d2b3dfa..0000000 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 348.31 ± 0.71 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.90 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__hblt0__fa1.log deleted file mode 100644 index 5c7f9e5..0000000 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 366.05 ± 1.98 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 49.32 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3.log deleted file mode 100644 index 638eede..0000000 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 979.46 ± 2.10 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.90 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3__fa1.log deleted file mode 100644 index 6d11aa9..0000000 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1105.32 ± 2.28 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 49.24 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3__hblt0.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3__hblt0.log deleted file mode 100644 index acf5f29..0000000 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 348.63 ± 0.64 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.81 ± 0.00 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3__hblt0__fa1.log deleted file mode 100644 index 6b8a5cd..0000000 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_3__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 368.28 ± 0.81 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 49.35 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma.log index dde3de4..5919eae 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 979.59 ± 0.72 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.85 ± 0.01 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 985.32 ± 1.37 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 50.21 ± 0.01 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1.log index 2e9c7a7..6ca70dd 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1098.00 ± 4.05 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 49.40 ± 0.01 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1101.41 ± 1.79 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 49.92 ± 0.01 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0.log index 51e1090..4258f59 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 899.84 ± 2.29 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.81 ± 0.01 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 910.75 ± 2.97 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 50.19 ± 0.02 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1.log index e7d62d8..fcfcec4 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1005.78 ± 1.42 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 49.37 ± 0.01 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1009.78 ± 2.26 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 49.91 ± 0.01 | -build: 4807e8f9 (6609) +build: 11f0af55 (6736) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4.log index 013bd5b..7649f89 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 979.86 ± 1.66 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.87 ± 0.01 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 980.87 ± 2.46 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.86 ± 0.01 | build: 4807e8f9 (6609) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log index 16d2791..1c195f9 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1117.04 ± 3.47 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 49.38 ± 0.01 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1113.80 ± 2.42 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 49.40 ± 0.01 | build: 4807e8f9 (6609) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0.log index c0db236..e0c43a5 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 895.65 ± 0.66 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.89 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 896.70 ± 0.33 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.87 ± 0.01 | build: 4807e8f9 (6609) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log index 81ee700..4e33479 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1020.22 ± 1.63 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 49.36 ± 0.01 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1017.14 ± 1.96 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 49.40 ± 0.01 | build: 4807e8f9 (6609) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants.log deleted file mode 100644 index 4db56a5..0000000 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 977.63 ± 2.98 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.91 ± 0.02 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants__fa1.log deleted file mode 100644 index 0fa5959..0000000 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1097.55 ± 1.49 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 49.33 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants__hblt0.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants__hblt0.log deleted file mode 100644 index 5636ade..0000000 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants__hblt0.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 860.30 ± 0.57 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.89 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log deleted file mode 100644 index 3b3feea..0000000 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 953.79 ± 3.60 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 49.35 ± 0.01 | - -build: f1fbffb5 (6486) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma.log index 08e2ece..cdea446 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 984.61 ± 2.65 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.94 ± 0.01 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 990.88 ± 3.03 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 50.20 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log index be2842e..ddbc53b 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1095.50 ± 2.69 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 49.34 ± 0.01 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1092.96 ± 3.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 49.81 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0.log index 98855b5..43f68cc 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 859.46 ± 1.91 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.90 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 875.30 ± 2.50 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 50.23 ± 0.02 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log index 847066a..49195bc 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 952.18 ± 1.68 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 49.32 ± 0.01 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 956.75 ± 3.28 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 49.98 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc.log index d564f8b..35c66af 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 980.24 ± 1.40 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.90 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 987.11 ± 2.95 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 50.23 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__fa1.log index e714e54..078fa87 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1100.05 ± 4.01 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 49.29 ± 0.01 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1107.83 ± 1.63 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 49.98 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0.log index bc6ac1c..44169fc 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 860.23 ± 0.94 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 49.92 ± 0.01 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | pp512 | 873.58 ± 1.82 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 0 | tg128 | 50.15 ± 0.02 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log index f12456c..1fda496 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 958.47 ± 2.31 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 49.29 ± 0.01 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 966.53 ± 1.48 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 49.81 ± 0.01 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk.log index 6162d50..19d7053 100644 --- a/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk.log +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 0 | pp512 | 1317.02 ± 4.04 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 0 | tg128 | 53.59 ± 0.07 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 0 | pp512 | 1325.29 ± 2.18 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 0 | tg128 | 53.69 ± 0.09 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log index 85db8fc..cf2840a 100644 --- a/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 1380.42 ± 7.77 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 52.95 ± 0.07 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 1376.09 ± 0.77 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 53.23 ± 0.06 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_radv.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv.log index 979748b..9031f93 100644 --- a/benchmark/results/llama-2-7b.Q4_0__vulkan_radv.log +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 0 | pp512 | 868.70 ± 8.94 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 0 | tg128 | 54.37 ± 0.04 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 0 | pp512 | 1005.90 ± 1.71 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 0 | tg128 | 54.61 ± 0.02 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1.log index d3a0b5a..6572ff7 100644 --- a/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 957.23 ± 9.23 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 53.49 ± 0.04 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 1096.08 ± 2.80 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 53.86 ± 0.02 | -build: f1fbffb5 (6486) +build: a3cb0474 (6735) diff --git a/docs/results.json b/docs/results.json index 5cc4a33..3bb18cc 100644 --- a/docs/results.json +++ b/docs/results.json @@ -1,22 +1,22 @@ { "meta": { - "generated_at": "2025-09-28T08:26:03Z", + "generated_at": "2025-10-12T06:31:27Z", "os_kernel": "Fedora 42 \u2014 Linux 6.15.9-201.fc42.x86_64 (Sat Aug 2 11:37:34 UTC 2025)", "llamacpp_builds": [ + { + "hash": "11f0af55", + "number": "6736" + }, { "hash": "4807e8f9", "number": "6609" }, { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } ], "environments": [ - "rocm6_4_3", - "rocm6_4_3-hblt0", - "rocm6_4_3-rocwmma", - "rocm6_4_3-rocwmma-hblt0", "rocm6_4_4", "rocm6_4_4-hblt0", "rocm6_4_4-rocwmma", @@ -24,8 +24,6 @@ "rocm7_rc", "rocm7_rc-hblt0", "rocm7_rc-rocwmma", - "rocm7_rc-rocwmma-fa_all_quants", - "rocm7_rc-rocwmma-fa_all_quants-hblt0", "rocm7_rc-rocwmma-hblt0", "vulkan_amdvlk", "vulkan_radv" @@ -33,406 +31,6 @@ "notes": "pp512 = prompt processing; tg128 = text generation; t/s = tokens/second" }, "runs": [ - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 125.93, - "tps_std": 0.26, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 20.52, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 135.4, - "tps_std": 0.23, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 20.69, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 132.28, - "tps_std": 0.14, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 20.5, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 139.86, - "tps_std": 0.32, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 20.7, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 125.92, - "tps_std": 0.27, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 20.52, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 134.12, - "tps_std": 0.59, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 20.66, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 131.45, - "tps_std": 0.35, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 20.53, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 140.67, - "tps_std": 0.26, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 20.67, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", @@ -441,8 +39,8 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 128.18, - "tps_std": 0.37, + "tps_mean": 121.16, + "tps_std": 0.25, "error": false, "error_type": null, "backend": "ROCm", @@ -454,8 +52,8 @@ "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -466,7 +64,7 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 20.51, + "tps_mean": 20.46, "tps_std": 0.0, "error": false, "error_type": null, @@ -479,8 +77,8 @@ "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -491,8 +89,8 @@ "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 134.92, - "tps_std": 0.21, + "tps_mean": 134.2, + "tps_std": 0.28, "error": false, "error_type": null, "backend": "ROCm", @@ -504,8 +102,8 @@ "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -516,6 +114,106 @@ "env_variant": "rocwmma", "fa": true, "test": "tg128", + "tps_mean": 21.04, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 135.87, + "tps_std": 0.06, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 20.46, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 171.53, + "tps_std": 0.45, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", "tps_mean": 21.08, "tps_std": 0.0, "error": false, @@ -527,22 +225,22 @@ "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", + "env": "rocm6_4_4", "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", + "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 159.31, - "tps_std": 0.83, + "tps_mean": 126.3, + "tps_std": 0.14, "error": false, "error_type": null, "backend": "ROCm", @@ -552,7 +250,7 @@ "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4.log", "build": { "hash": "4807e8f9", "number": "6609" @@ -561,12 +259,112 @@ { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", + "env": "rocm6_4_4", "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", + "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 20.34, + "tps_mean": 19.83, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 136.0, + "tps_std": 0.15, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 20.93, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 160.81, + "tps_std": 0.78, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 20.41, "tps_std": 0.01, "error": false, "error_type": null, @@ -577,7 +375,7 @@ "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0.log", "build": { "hash": "4807e8f9", "number": "6609" @@ -586,13 +384,13 @@ { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", + "env": "rocm6_4_4-hblt0", "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", + "env_variant": "hblt0", "fa": true, "test": "pp512", - "tps_mean": 171.67, - "tps_std": 0.36, + "tps_mean": 132.96, + "tps_std": 0.49, "error": false, "error_type": null, "backend": "ROCm", @@ -602,7 +400,7 @@ "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", "build": { "hash": "4807e8f9", "number": "6609" @@ -611,9 +409,409 @@ { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", + "env": "rocm6_4_4-hblt0", "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 20.99, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 102.61, + "tps_std": 0.2, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 20.54, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 67.08, + "tps_std": 0.15, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 21.07, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 117.71, + "tps_std": 0.66, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 20.53, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 148.21, + "tps_std": 0.25, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 21.1, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 119.33, + "tps_std": 0.28, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 20.19, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 93.03, + "tps_std": 0.12, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 21.0, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 136.81, + "tps_std": 0.43, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 20.52, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 148.95, + "tps_std": 0.73, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", "fa": true, "test": "tg128", "tps_mean": 21.04, @@ -627,874 +825,74 @@ "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "a3cb0474", + "number": "6735" } }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 128.02, - "tps_std": 0.3, + "tps_mean": 216.84, + "tps_std": 0.52, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, "params_b": 110.47, "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "a3cb0474", + "number": "6735" } }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 20.53, + "tps_mean": 24.15, "tps_std": 0.01, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, "params_b": 110.47, "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "a3cb0474", + "number": "6735" } }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 136.15, - "tps_std": 0.32, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 21.05, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 160.41, - "tps_std": 0.61, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 20.5, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 161.32, - "tps_std": 0.19, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 21.06, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": false, - "test": "pp512", - "tps_mean": 94.56, - "tps_std": 0.11, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": false, - "test": "tg128", - "tps_mean": 19.9, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": true, - "test": "pp512", - "tps_mean": 127.25, - "tps_std": 0.57, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": true, - "test": "tg128", - "tps_mean": 20.66, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 128.69, - "tps_std": 0.57, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 20.56, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 169.19, - "tps_std": 0.12, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 20.67, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 94.71, - "tps_std": 0.12, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 20.53, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 126.97, + "tps_mean": 218.68, "tps_std": 0.54, "error": false, "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 20.7, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 160.39, - "tps_std": 0.34, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 20.56, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 169.35, - "tps_std": 0.56, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 20.65, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 94.73, - "tps_std": 0.22, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 20.47, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 93.27, - "tps_std": 0.18, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 20.67, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 159.89, - "tps_std": 0.44, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 20.55, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 170.42, - "tps_std": 0.33, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 20.66, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 217.22, - "tps_std": 0.49, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 24.18, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 219.61, - "tps_std": 0.55, - "error": false, - "error_type": null, "backend": "Vulkan", "ngl": 99, "mmap": 0, @@ -1504,8 +902,8 @@ "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -1516,7 +914,7 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 24.21, + "tps_mean": 24.27, "tps_std": 0.01, "error": false, "error_type": null, @@ -1529,8 +927,8 @@ "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -1541,8 +939,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 212.6, - "tps_std": 0.74, + "tps_mean": 223.39, + "tps_std": 1.25, "error": false, "error_type": null, "backend": "Vulkan", @@ -1554,8 +952,8 @@ "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -1566,7 +964,7 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 24.18, + "tps_mean": 24.06, "tps_std": 0.03, "error": false, "error_type": null, @@ -1579,8 +977,8 @@ "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -1591,8 +989,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 224.85, - "tps_std": 2.55, + "tps_mean": 236.02, + "tps_std": 2.6, "error": false, "error_type": null, "backend": "Vulkan", @@ -1604,8 +1002,8 @@ "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -1616,7 +1014,7 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 24.64, + "tps_mean": 24.51, "tps_std": 0.01, "error": false, "error_type": null, @@ -1629,324 +1027,8 @@ "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 120.87, - "tps_std": 0.23, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 15.86, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 128.65, - "tps_std": 0.59, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 15.96, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log", - "build": null - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log", - "build": null - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 120.53, - "tps_std": 0.28, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 15.87, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 129.22, - "tps_std": 0.41, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 15.95, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__hblt0.log", - "build": null - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 128.68, - "tps_std": 0.22, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 15.96, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -1957,8 +1039,8 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 123.24, - "tps_std": 0.42, + "tps_mean": 118.41, + "tps_std": 0.2, "error": false, "error_type": null, "backend": "ROCm", @@ -1970,8 +1052,8 @@ "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -1982,832 +1064,7 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 15.84, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 129.37, - "tps_std": 0.24, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 16.17, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 151.03, - "tps_std": 0.45, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 15.79, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 155.49, - "tps_std": 0.74, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 16.18, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 122.48, - "tps_std": 0.34, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 15.86, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 130.06, - "tps_std": 0.38, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 16.18, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 150.67, - "tps_std": 0.75, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 15.84, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 149.93, - "tps_std": 0.58, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 16.18, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": false, - "test": "pp512", - "tps_mean": 91.95, - "tps_std": 0.25, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": false, - "test": "tg128", - "tps_mean": 15.76, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": true, - "test": "pp512", - "tps_mean": 70.0, - "tps_std": 0.17, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": true, - "test": "tg128", - "tps_mean": 15.98, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 134.22, - "tps_std": 0.5, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 15.9, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 159.75, - "tps_std": 0.33, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 15.99, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 92.18, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 15.92, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 121.75, - "tps_std": 0.32, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 15.97, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 151.32, - "tps_std": 0.45, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 15.9, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 161.1, - "tps_std": 0.36, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 15.99, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 92.2, - "tps_std": 0.11, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 15.85, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 71.02, + "tps_mean": 15.75, "tps_std": 0.16, "error": false, "error_type": null, @@ -2818,10 +1075,635 @@ "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 128.51, + "tps_std": 0.51, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 16.13, + "tps_std": 0.16, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 133.48, + "tps_std": 0.45, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 15.77, + "tps_std": 0.11, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 143.55, + "tps_std": 0.54, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 16.17, + "tps_std": 0.06, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 121.76, + "tps_std": 0.14, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 15.69, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 129.77, + "tps_std": 0.12, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 16.14, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 140.68, + "tps_std": 0.66, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 15.84, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 134.52, + "tps_std": 0.53, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 16.08, + "tps_std": 0.21, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 97.09, + "tps_std": 0.15, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 15.89, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 69.91, + "tps_std": 0.44, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 16.13, + "tps_std": 0.11, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 128.74, + "tps_std": 0.3, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 15.77, + "tps_std": 0.2, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 133.32, + "tps_std": 0.82, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 16.1, + "tps_std": 0.2, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 91.95, + "tps_std": 0.23, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 15.8, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 125.81, + "tps_std": 0.29, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -2832,7 +1714,7 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 15.96, + "tps_mean": 16.2, "tps_std": 0.0, "error": false, "error_type": null, @@ -2845,8 +1727,8 @@ "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -2857,8 +1739,8 @@ "env_variant": "hblt0", "fa": false, "test": "pp512", - "tps_mean": 147.32, - "tps_std": 0.43, + "tps_mean": 142.12, + "tps_std": 0.6, "error": false, "error_type": null, "backend": "ROCm", @@ -2870,8 +1752,8 @@ "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -2882,7 +1764,7 @@ "env_variant": "hblt0", "fa": false, "test": "tg128", - "tps_mean": 15.91, + "tps_mean": 15.89, "tps_std": 0.0, "error": false, "error_type": null, @@ -2895,8 +1777,8 @@ "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -2907,8 +1789,8 @@ "env_variant": "hblt0", "fa": true, "test": "pp512", - "tps_mean": 161.37, - "tps_std": 0.36, + "tps_mean": 139.6, + "tps_std": 0.48, "error": false, "error_type": null, "backend": "ROCm", @@ -2920,8 +1802,8 @@ "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -2932,8 +1814,8 @@ "env_variant": "hblt0", "fa": true, "test": "tg128", - "tps_mean": 15.99, - "tps_std": 0.0, + "tps_mean": 16.1, + "tps_std": 0.17, "error": false, "error_type": null, "backend": "ROCm", @@ -2945,8 +1827,8 @@ "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -2957,8 +1839,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 264.5, - "tps_std": 0.99, + "tps_mean": 260.51, + "tps_std": 1.03, "error": false, "error_type": null, "backend": "Vulkan", @@ -2970,8 +1852,8 @@ "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -2982,8 +1864,8 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 17.27, - "tps_std": 0.0, + "tps_mean": 17.26, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", @@ -2995,8 +1877,8 @@ "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -3007,8 +1889,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 267.86, - "tps_std": 1.22, + "tps_mean": 262.18, + "tps_std": 1.19, "error": false, "error_type": null, "backend": "Vulkan", @@ -3020,8 +1902,8 @@ "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -3032,8 +1914,8 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 17.28, - "tps_std": 0.0, + "tps_mean": 17.3, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", @@ -3045,8 +1927,8 @@ "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -3057,8 +1939,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 208.01, - "tps_std": 0.73, + "tps_mean": 222.31, + "tps_std": 0.71, "error": false, "error_type": null, "backend": "Vulkan", @@ -3070,8 +1952,8 @@ "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -3082,8 +1964,8 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 17.49, - "tps_std": 0.02, + "tps_mean": 17.43, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", @@ -3095,8 +1977,8 @@ "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -3107,8 +1989,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 221.63, - "tps_std": 1.26, + "tps_mean": 233.21, + "tps_std": 6.28, "error": false, "error_type": null, "backend": "Vulkan", @@ -3120,8 +2002,8 @@ "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -3132,7 +2014,7 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 17.71, + "tps_mean": 17.65, "tps_std": 0.01, "error": false, "error_type": null, @@ -3145,270 +2027,10 @@ "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 70.0, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log", - "build": null - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 101.82, - "tps_std": 0.06, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 2.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 70.0, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log", - "build": null - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 70.0, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log", - "build": null - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 94.79, - "tps_std": 0.14, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 2.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 104.62, - "tps_std": 0.08, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 2.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 70.0, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__hblt0.log", - "build": null - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 70.0, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log", - "build": null - }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", @@ -3417,8 +2039,8 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 98.87, - "tps_std": 0.18, + "tps_mean": 98.0, + "tps_std": 0.05, "error": false, "error_type": null, "backend": "ROCm", @@ -3430,8 +2052,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -3443,7 +2065,7 @@ "fa": false, "test": "tg128", "tps_mean": 2.77, - "tps_std": 0.0, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -3455,8 +2077,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -3467,8 +2089,8 @@ "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 104.31, - "tps_std": 0.07, + "tps_mean": 103.65, + "tps_std": 0.17, "error": false, "error_type": null, "backend": "ROCm", @@ -3480,8 +2102,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -3505,8 +2127,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -3517,8 +2139,8 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "pp512", - "tps_mean": 97.43, - "tps_std": 0.23, + "tps_mean": 98.82, + "tps_std": 0.24, "error": false, "error_type": null, "backend": "ROCm", @@ -3530,8 +2152,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -3542,6 +2164,106 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "tg128", + "tps_mean": 2.78, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 102.51, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 2.79, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 97.85, + "tps_std": 0.06, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": false, + "test": "tg128", "tps_mean": 2.76, "tps_std": 0.0, "error": false, @@ -3553,7 +2275,7 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4.log", "build": { "hash": "4807e8f9", "number": "6609" @@ -3562,12 +2284,212 @@ { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", + "env": "rocm6_4_4", "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", + "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 103.81, + "tps_mean": 102.47, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 2.79, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 98.12, + "tps_std": 0.1, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 2.78, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 104.23, + "tps_std": 0.24, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 2.79, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 92.92, + "tps_std": 8.6, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 2.78, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 103.05, "tps_std": 0.09, "error": false, "error_type": null, @@ -3578,510 +2500,10 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 2.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 99.32, - "tps_std": 0.17, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 2.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 104.93, - "tps_std": 0.11, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 2.79, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 98.99, - "tps_std": 0.21, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 2.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 103.03, - "tps_std": 0.23, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 2.79, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": false, - "test": "pp512", - "tps_mean": 98.15, - "tps_std": 0.16, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": false, - "test": "tg128", - "tps_mean": 2.77, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": true, - "test": "pp512", - "tps_mean": 102.79, - "tps_std": 0.14, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": true, - "test": "tg128", - "tps_mean": 2.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 93.89, - "tps_std": 0.22, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 2.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 97.53, - "tps_std": 0.17, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 2.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 97.42, - "tps_std": 0.12, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 2.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 101.56, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -4093,206 +2515,6 @@ "fa": true, "test": "tg128", "tps_mean": 2.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 92.02, - "tps_std": 0.17, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 2.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 97.1, - "tps_std": 0.17, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 2.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 95.12, - "tps_std": 0.17, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 2.77, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 103.16, - "tps_std": 0.07, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 2.78, "tps_std": 0.01, "error": false, "error_type": null, @@ -4303,10 +2525,210 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 93.96, + "tps_std": 0.14, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 2.78, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 97.51, + "tps_std": 0.21, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 2.79, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 95.55, + "tps_std": 0.07, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 2.77, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 104.25, + "tps_std": 0.16, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 2.78, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" } }, { @@ -4317,8 +2739,8 @@ "env_variant": "hblt0", "fa": false, "test": "pp512", - "tps_mean": 93.86, - "tps_std": 0.18, + "tps_mean": 93.72, + "tps_std": 0.13, "error": false, "error_type": null, "backend": "ROCm", @@ -4330,8 +2752,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -4355,8 +2777,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -4367,8 +2789,8 @@ "env_variant": "hblt0", "fa": true, "test": "pp512", - "tps_mean": 95.87, - "tps_std": 0.08, + "tps_mean": 97.17, + "tps_std": 0.16, "error": false, "error_type": null, "backend": "ROCm", @@ -4380,8 +2802,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -4392,7 +2814,7 @@ "env_variant": "hblt0", "fa": true, "test": "tg128", - "tps_mean": 2.78, + "tps_mean": 2.79, "tps_std": 0.0, "error": false, "error_type": null, @@ -4405,8 +2827,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -4417,8 +2839,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 97.72, - "tps_std": 0.36, + "tps_mean": 97.22, + "tps_std": 0.38, "error": false, "error_type": null, "backend": "Vulkan", @@ -4430,8 +2852,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -4455,8 +2877,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -4467,8 +2889,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 99.04, - "tps_std": 0.31, + "tps_mean": 98.46, + "tps_std": 0.54, "error": false, "error_type": null, "backend": "Vulkan", @@ -4480,8 +2902,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -4505,8 +2927,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -4517,8 +2939,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 78.94, - "tps_std": 0.51, + "tps_mean": 83.82, + "tps_std": 1.56, "error": false, "error_type": null, "backend": "Vulkan", @@ -4530,8 +2952,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -4555,8 +2977,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -4567,8 +2989,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 80.9, - "tps_std": 0.77, + "tps_mean": 86.06, + "tps_std": 1.83, "error": false, "error_type": null, "backend": "Vulkan", @@ -4580,8 +3002,8 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -4605,270 +3027,10 @@ "quant": "Q8_K_XL", "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 265.76, - "tps_std": 0.95, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 14.69, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 289.14, - "tps_std": 1.57, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 14.64, - "tps_std": 0.15, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log", - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log", - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3.log", - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 291.67, - "tps_std": 0.91, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 14.71, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__hblt0.log", - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__hblt0__fa1.log", - "build": null - }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", @@ -4877,8 +3039,8 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 276.88, - "tps_std": 1.57, + "tps_mean": 264.81, + "tps_std": 10.73, "error": false, "error_type": null, "backend": "ROCm", @@ -4890,8 +3052,8 @@ "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -4902,8 +3064,8 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 14.66, - "tps_std": 0.0, + "tps_mean": 14.68, + "tps_std": 0.05, "error": false, "error_type": null, "backend": "ROCm", @@ -4915,8 +3077,8 @@ "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -4927,8 +3089,8 @@ "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 292.47, - "tps_std": 1.18, + "tps_mean": 282.95, + "tps_std": 5.18, "error": false, "error_type": null, "backend": "ROCm", @@ -4940,8 +3102,8 @@ "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -4952,8 +3114,8 @@ "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 14.83, - "tps_std": 0.0, + "tps_mean": 14.77, + "tps_std": 0.06, "error": false, "error_type": null, "backend": "ROCm", @@ -4965,8 +3127,8 @@ "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -4977,8 +3139,8 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "pp512", - "tps_mean": 277.79, - "tps_std": 0.94, + "tps_mean": 278.22, + "tps_std": 1.12, "error": false, "error_type": null, "backend": "ROCm", @@ -4990,8 +3152,8 @@ "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -5002,8 +3164,8 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "tg128", - "tps_mean": 14.65, - "tps_std": 0.0, + "tps_mean": 14.67, + "tps_std": 0.03, "error": false, "error_type": null, "backend": "ROCm", @@ -5015,8 +3177,8 @@ "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -5026,9 +3188,31 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma-hblt0", "fa": true, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 17.0, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", + "build": null + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": false, "test": "pp512", - "tps_mean": 292.17, - "tps_std": 1.61, + "tps_mean": 269.91, + "tps_std": 1.51, "error": false, "error_type": null, "backend": "ROCm", @@ -5038,32 +3222,7 @@ "file_size_gib": 82.35, "name_params_b": 107.77, "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 14.83, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4.log", "build": { "hash": "4807e8f9", "number": "6609" @@ -5076,8 +3235,305 @@ "env_base": "rocm6_4_4", "env_variant": null, "fa": false, + "test": "tg128", + "tps_mean": 14.5, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, "test": "pp512", - "tps_mean": 276.97, + "tps_mean": 288.79, + "tps_std": 1.53, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__fa1.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 14.79, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__fa1.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 276.43, + "tps_std": 0.75, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 14.69, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "hang", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 17.0, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "build": null + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 270.82, + "tps_std": 1.43, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 14.66, + "tps_std": 0.07, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 288.92, + "tps_std": 3.51, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 14.81, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 275.26, + "tps_std": 1.54, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 14.66, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 285.69, "tps_std": 1.15, "error": false, "error_type": null, @@ -5088,535 +3544,10 @@ "file_size_gib": 82.35, "name_params_b": 107.77, "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 14.71, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 293.79, - "tps_std": 2.33, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 14.84, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 278.59, - "tps_std": 1.22, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 14.7, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 296.61, - "tps_std": 0.98, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 14.83, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": false, - "test": "pp512", - "tps_mean": 276.44, - "tps_std": 1.46, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": false, - "test": "tg128", - "tps_mean": 14.55, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": true, - "test": "pp512", - "tps_mean": 292.67, - "tps_std": 1.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": true, - "test": "tg128", - "tps_mean": 14.71, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 273.88, - "tps_std": 1.14, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 14.7, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 284.81, - "tps_std": 1.55, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 14.72, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 274.13, - "tps_std": 0.84, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 14.71, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 292.92, - "tps_std": 2.63, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 14.71, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 273.23, - "tps_std": 1.35, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 14.7, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -5626,20 +3557,23 @@ "env_base": "rocm7_rc", "env_variant": "rocwmma-hblt0", "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, + "test": "tg128", + "tps_mean": 14.82, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": null + "build": { + "hash": "a3cb0474", + "number": "6735" + } }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", @@ -5652,7 +3586,7 @@ "tps_mean": null, "tps_std": null, "error": true, - "error_type": "runtime", + "error_type": "hang", "backend": null, "ngl": null, "mmap": null, @@ -5670,121 +3604,121 @@ "env_base": "rocm7_rc", "env_variant": null, "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, + "test": "pp512", + "tps_mean": 291.9, + "tps_std": 1.98, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 14.78, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 271.87, + "tps_std": 1.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 14.69, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 82.35, + "name_params_b": 107.77, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "hang", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 17.0, + "quant": "Q6_K", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1.log", "build": null }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 274.52, - "tps_std": 1.78, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 14.7, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 287.04, - "tps_std": 1.92, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 14.71, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", @@ -5793,8 +3727,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 224.02, - "tps_std": 2.86, + "tps_mean": 224.42, + "tps_std": 3.09, "error": false, "error_type": null, "backend": "Vulkan", @@ -5806,8 +3740,8 @@ "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -5818,8 +3752,8 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 15.98, - "tps_std": 0.0, + "tps_mean": 15.99, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "Vulkan", @@ -5831,8 +3765,8 @@ "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -5843,8 +3777,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 234.3, - "tps_std": 1.1, + "tps_mean": 224.57, + "tps_std": 3.64, "error": false, "error_type": null, "backend": "Vulkan", @@ -5856,8 +3790,8 @@ "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -5868,8 +3802,8 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 15.75, - "tps_std": 0.0, + "tps_mean": 15.76, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", @@ -5881,8 +3815,8 @@ "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -5893,8 +3827,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 201.49, - "tps_std": 2.22, + "tps_mean": 206.64, + "tps_std": 2.56, "error": false, "error_type": null, "backend": "Vulkan", @@ -5906,8 +3840,8 @@ "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -5918,7 +3852,7 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 15.77, + "tps_mean": 15.81, "tps_std": 0.01, "error": false, "error_type": null, @@ -5931,8 +3865,8 @@ "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -5943,8 +3877,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 202.49, - "tps_std": 5.98, + "tps_mean": 212.38, + "tps_std": 2.39, "error": false, "error_type": null, "backend": "Vulkan", @@ -5956,8 +3890,8 @@ "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -5968,8 +3902,8 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 15.74, - "tps_std": 0.0, + "tps_mean": 15.76, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", @@ -5981,298 +3915,10 @@ "quant": "Q6_K", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 264.44, - "tps_std": 24.69, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 11.88, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 298.83, - "tps_std": 1.59, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 11.89, - "tps_std": 0.06, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log", - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log", - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 274.49, - "tps_std": 1.84, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 11.91, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 298.07, - "tps_std": 2.73, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 11.89, - "tps_std": 0.06, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__hblt0.log", - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__hblt0__fa1.log", - "build": null - }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", @@ -6281,8 +3927,8 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 281.33, - "tps_std": 2.6, + "tps_mean": 275.0, + "tps_std": 1.39, "error": false, "error_type": null, "backend": "ROCm", @@ -6294,8 +3940,8 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -6307,7 +3953,7 @@ "fa": false, "test": "tg128", "tps_mean": 11.89, - "tps_std": 0.0, + "tps_std": 0.03, "error": false, "error_type": null, "backend": "ROCm", @@ -6319,8 +3965,8 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -6331,8 +3977,8 @@ "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 297.14, - "tps_std": 1.58, + "tps_mean": 293.68, + "tps_std": 3.72, "error": false, "error_type": null, "backend": "ROCm", @@ -6344,8 +3990,8 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -6356,757 +4002,7 @@ "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 12.0, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 280.36, - "tps_std": 0.42, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 11.88, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 298.12, - "tps_std": 2.72, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 12.0, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 279.89, - "tps_std": 0.66, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 11.92, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 297.68, - "tps_std": 2.9, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 11.97, - "tps_std": 0.09, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 284.44, - "tps_std": 3.25, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 11.9, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 300.04, - "tps_std": 1.45, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 12.0, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": false, - "test": "pp512", - "tps_mean": 275.21, - "tps_std": 1.93, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": false, - "test": "tg128", - "tps_mean": 11.85, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": true, - "test": "pp512", - "tps_mean": 292.69, - "tps_std": 2.25, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": true, - "test": "tg128", - "tps_mean": 11.91, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 267.51, - "tps_std": 12.72, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 11.9, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 286.25, - "tps_std": 4.29, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 11.9, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 260.6, - "tps_std": 10.8, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 11.82, - "tps_std": 0.22, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 293.26, - "tps_std": 3.75, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 11.92, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 275.91, - "tps_std": 1.81, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 11.91, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 260.83, - "tps_std": 5.18, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 11.82, - "tps_std": 0.2, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 279.56, - "tps_std": 3.76, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 11.88, + "tps_mean": 11.96, "tps_std": 0.02, "error": false, "error_type": null, @@ -7117,44 +4013,22 @@ "file_size_gib": 106.65, "name_params_b": 107.77, "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "11f0af55", + "number": "6736" } }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log", - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", "fa": false, "test": "pp512", - "tps_mean": 278.61, - "tps_std": 2.47, + "tps_mean": 260.53, + "tps_std": 23.26, "error": false, "error_type": null, "backend": "ROCm", @@ -7164,19 +4038,319 @@ "file_size_gib": 106.65, "name_params_b": 107.77, "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "11f0af55", + "number": "6736" } }, { "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 11.82, + "tps_std": 0.11, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 295.09, + "tps_std": 2.26, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 11.98, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 276.63, + "tps_std": 1.32, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 11.87, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 299.51, + "tps_std": 2.06, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__fa1.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 11.99, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__fa1.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", "env_variant": "hblt0", "fa": false, + "test": "pp512", + "tps_mean": 283.0, + "tps_std": 2.1, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 11.9, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 293.35, + "tps_std": 11.21, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0__fa1.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 11.94, + "tps_std": 0.13, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0__fa1.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 272.39, + "tps_std": 2.15, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, "test": "tg128", "tps_mean": 11.92, "tps_std": 0.0, @@ -7189,10 +4363,310 @@ "file_size_gib": 106.65, "name_params_b": 107.77, "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 296.04, + "tps_std": 2.16, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 11.98, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 266.07, + "tps_std": 22.72, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 11.92, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 257.0, + "tps_std": 4.08, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 11.76, + "tps_std": 0.51, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 263.12, + "tps_std": 18.31, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 11.89, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 284.19, + "tps_std": 24.64, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 11.89, + "tps_std": 0.2, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 273.01, + "tps_std": 1.64, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 11.85, + "tps_std": 0.11, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" } }, { @@ -7225,8 +4699,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 343.36, - "tps_std": 1.37, + "tps_mean": 346.53, + "tps_std": 1.71, "error": false, "error_type": null, "backend": "Vulkan", @@ -7238,8 +4712,8 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -7263,8 +4737,8 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -7275,8 +4749,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 347.56, - "tps_std": 1.15, + "tps_mean": 346.93, + "tps_std": 1.5, "error": false, "error_type": null, "backend": "Vulkan", @@ -7288,8 +4762,8 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -7300,7 +4774,57 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 12.42, + "tps_mean": 12.44, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 272.53, + "tps_std": 1.82, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 106.65, + "name_params_b": 107.77, + "quant": "Q8_0", + "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 12.58, "tps_std": 0.01, "error": false, "error_type": null, @@ -7311,60 +4835,10 @@ "file_size_gib": 106.65, "name_params_b": 107.77, "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 244.52, - "tps_std": 1.08, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log", "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 12.57, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -7375,8 +4849,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 253.13, - "tps_std": 1.36, + "tps_mean": 280.38, + "tps_std": 1.48, "error": false, "error_type": null, "backend": "Vulkan", @@ -7388,8 +4862,8 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -7400,7 +4874,7 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 12.56, + "tps_mean": 12.58, "tps_std": 0.0, "error": false, "error_type": null, @@ -7413,22 +4887,122 @@ "quant": "Q8_0", "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", "env_variant": "rocwmma", "fa": false, + "test": "pp512", + "tps_mean": 283.37, + "tps_std": 1.53, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 17.81, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 305.77, + "tps_std": 1.56, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 17.97, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": false, "test": null, "tps_mean": null, "tps_std": null, "error": true, - "error_type": "hang", + "error_type": "runtime", "backend": null, "ngl": null, "mmap": null, @@ -7436,509 +5010,9 @@ "file_size_gib": null, "name_params_b": 17.0, "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log", "build": null }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 307.79, - "tps_std": 3.48, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 17.81, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 138.51, - "tps_std": 0.72, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 17.8, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 142.41, - "tps_std": 0.57, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 17.8, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 282.5, - "tps_std": 1.23, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 17.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 306.89, - "tps_std": 1.35, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 17.82, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 137.98, - "tps_std": 0.67, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 17.79, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 143.18, - "tps_std": 0.45, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 17.81, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 291.19, - "tps_std": 2.35, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 17.82, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 307.71, - "tps_std": 1.77, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 18.0, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 291.96, - "tps_std": 2.18, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 17.82, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, { "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", @@ -7947,8 +5021,8 @@ "env_variant": "rocwmma-hblt0", "fa": true, "test": "pp512", - "tps_mean": 310.84, - "tps_std": 1.35, + "tps_mean": 306.78, + "tps_std": 2.0, "error": false, "error_type": null, "backend": "ROCm", @@ -7960,8 +5034,8 @@ "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -7972,7 +5046,7 @@ "env_variant": "rocwmma-hblt0", "fa": true, "test": "tg128", - "tps_mean": 18.01, + "tps_mean": 17.96, "tps_std": 0.0, "error": false, "error_type": null, @@ -7985,8 +5059,8 @@ "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -7997,8 +5071,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 291.26, - "tps_std": 0.79, + "tps_mean": 285.54, + "tps_std": 1.17, "error": false, "error_type": null, "backend": "ROCm", @@ -8022,7 +5096,7 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 17.83, + "tps_mean": 17.59, "tps_std": 0.0, "error": false, "error_type": null, @@ -8047,8 +5121,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 311.26, - "tps_std": 1.06, + "tps_mean": 310.82, + "tps_std": 2.23, "error": false, "error_type": null, "backend": "ROCm", @@ -8072,6 +5146,406 @@ "env_variant": null, "fa": true, "test": "tg128", + "tps_mean": 17.96, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 295.23, + "tps_std": 0.7, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 17.81, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 312.09, + "tps_std": 1.64, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 17.97, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 281.11, + "tps_std": 2.07, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 17.82, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 305.36, + "tps_std": 1.78, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 17.97, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 284.75, + "tps_std": 2.41, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 17.8, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 298.01, + "tps_std": 1.45, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 17.96, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 281.91, + "tps_std": 2.33, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 17.79, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 306.77, + "tps_std": 2.47, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "tg128", "tps_mean": 17.97, "tps_std": 0.0, "error": false, @@ -8083,555 +5557,11 @@ "file_size_gib": 57.73, "name_params_b": 107.77, "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 290.78, - "tps_std": 1.38, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 17.81, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 310.36, - "tps_std": 1.62, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 18.0, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": false, - "test": "pp512", - "tps_mean": 283.86, - "tps_std": 1.18, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": false, - "test": "tg128", - "tps_mean": 17.71, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": true, - "test": "pp512", - "tps_mean": 299.13, - "tps_std": 2.14, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": true, - "test": "tg128", - "tps_mean": 17.77, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 286.66, - "tps_std": 1.37, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 17.79, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 300.0, - "tps_std": 1.51, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 17.82, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 284.38, - "tps_std": 0.76, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 17.82, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 306.4, - "tps_std": 1.77, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 17.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 286.08, - "tps_std": 2.96, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 17.82, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 297.71, - "tps_std": 1.73, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 17.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log", - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log", - "build": null + "build": { + "hash": "a3cb0474", + "number": "6735" + } }, { "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", @@ -8641,8 +5571,8 @@ "env_variant": "hblt0", "fa": false, "test": "pp512", - "tps_mean": 284.17, - "tps_std": 2.14, + "tps_mean": 284.71, + "tps_std": 1.24, "error": false, "error_type": null, "backend": "ROCm", @@ -8654,8 +5584,8 @@ "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -8666,7 +5596,7 @@ "env_variant": "hblt0", "fa": false, "test": "tg128", - "tps_mean": 17.8, + "tps_mean": 17.78, "tps_std": 0.0, "error": false, "error_type": null, @@ -8679,8 +5609,8 @@ "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -8691,8 +5621,8 @@ "env_variant": "hblt0", "fa": true, "test": "pp512", - "tps_mean": 300.96, - "tps_std": 1.85, + "tps_mean": 299.68, + "tps_std": 1.75, "error": false, "error_type": null, "backend": "ROCm", @@ -8704,8 +5634,8 @@ "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -8716,7 +5646,7 @@ "env_variant": "hblt0", "fa": true, "test": "tg128", - "tps_mean": 17.81, + "tps_mean": 17.93, "tps_std": 0.0, "error": false, "error_type": null, @@ -8729,8 +5659,8 @@ "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -8741,8 +5671,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 191.71, - "tps_std": 1.02, + "tps_mean": 193.74, + "tps_std": 0.96, "error": false, "error_type": null, "backend": "Vulkan", @@ -8754,8 +5684,8 @@ "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -8766,7 +5696,107 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 21.03, + "tps_mean": 21.05, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 194.33, + "tps_std": 1.56, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 20.64, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 223.8, + "tps_std": 2.7, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 107.77, + "file_size_gib": 57.73, + "name_params_b": 107.77, + "quant": "Q4_K_XL", + "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 20.91, "tps_std": 0.02, "error": false, "error_type": null, @@ -8777,110 +5807,10 @@ "file_size_gib": 57.73, "name_params_b": 107.77, "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 193.39, - "tps_std": 1.52, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 20.61, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 213.71, - "tps_std": 2.99, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log", "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 20.87, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -8891,8 +5821,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 217.08, - "tps_std": 5.59, + "tps_mean": 228.13, + "tps_std": 3.26, "error": false, "error_type": null, "backend": "Vulkan", @@ -8904,8 +5834,8 @@ "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -8916,8 +5846,8 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 20.85, - "tps_std": 0.01, + "tps_mean": 20.88, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", @@ -8929,380 +5859,8 @@ "quant": "Q4_K_XL", "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 129.22, - "tps_std": 0.43, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 14.25, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 143.48, - "tps_std": 1.3, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 14.32, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 235.0, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log", - "build": null - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 75.22, - "tps_std": 0.16, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 14.29, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 129.64, - "tps_std": 0.35, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 14.24, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 144.82, - "tps_std": 0.84, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 14.32, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 74.17, - "tps_std": 0.11, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 14.27, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 77.91, - "tps_std": 0.23, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 14.31, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -9313,8 +5871,8 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 134.57, - "tps_std": 0.66, + "tps_mean": 131.74, + "tps_std": 0.4, "error": false, "error_type": null, "backend": "ROCm", @@ -9326,8 +5884,8 @@ "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -9338,306 +5896,6 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 14.57, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 144.38, - "tps_std": 0.73, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 14.9, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 134.69, - "tps_std": 1.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 14.58, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 143.45, - "tps_std": 0.41, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 14.97, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 133.5, - "tps_std": 0.67, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 14.55, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 144.31, - "tps_std": 0.58, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 14.93, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 133.54, - "tps_std": 0.74, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", "tps_mean": 14.54, "tps_std": 0.0, "error": false, @@ -9649,6 +5907,306 @@ "file_size_gib": 96.99, "name_params_b": 235.09, "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 142.3, + "tps_std": 0.82, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 14.9, + "tps_std": 0.12, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 134.45, + "tps_std": 0.46, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 14.58, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 141.69, + "tps_std": 0.47, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 14.95, + "tps_std": 0.09, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 132.25, + "tps_std": 0.49, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 14.39, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 144.16, + "tps_std": 0.77, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 14.87, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 133.5, + "tps_std": 0.69, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 14.55, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0.log", "build": { "hash": "4807e8f9", @@ -9663,8 +6221,8 @@ "env_variant": "hblt0", "fa": true, "test": "pp512", - "tps_mean": 144.26, - "tps_std": 0.29, + "tps_mean": 143.26, + "tps_std": 0.97, "error": false, "error_type": null, "backend": "ROCm", @@ -9688,8 +6246,8 @@ "env_variant": "hblt0", "fa": true, "test": "tg128", - "tps_mean": 14.92, - "tps_std": 0.0, + "tps_mean": 14.87, + "tps_std": 0.08, "error": false, "error_type": null, "backend": "ROCm", @@ -9705,206 +6263,6 @@ "number": "6609" } }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": false, - "test": "pp512", - "tps_mean": 131.98, - "tps_std": 0.86, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": false, - "test": "tg128", - "tps_mean": 14.14, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": true, - "test": "pp512", - "tps_mean": 141.08, - "tps_std": 0.51, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": true, - "test": "tg128", - "tps_mean": 14.29, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 130.87, - "tps_std": 0.83, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 14.25, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 137.23, - "tps_std": 0.55, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 14.32, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, { "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", @@ -9913,207 +6271,7 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 132.6, - "tps_std": 0.56, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 14.33, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 143.76, - "tps_std": 0.58, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 14.36, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 134.24, - "tps_std": 0.57, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 14.32, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 141.84, - "tps_std": 0.84, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 14.37, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 134.45, + "tps_mean": 132.68, "tps_std": 0.5, "error": false, "error_type": null, @@ -10124,10 +6282,210 @@ "file_size_gib": 96.99, "name_params_b": 235.09, "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 14.62, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 142.6, + "tps_std": 0.56, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 14.9, + "tps_std": 0.09, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 133.6, + "tps_std": 0.66, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 14.59, + "tps_std": 0.06, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 140.15, + "tps_std": 1.2, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 14.93, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 133.39, + "tps_std": 0.54, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -10138,7 +6496,7 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 14.32, + "tps_mean": 14.62, "tps_std": 0.0, "error": false, "error_type": null, @@ -10151,8 +6509,8 @@ "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -10163,8 +6521,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 145.01, - "tps_std": 0.84, + "tps_mean": 146.88, + "tps_std": 0.69, "error": false, "error_type": null, "backend": "ROCm", @@ -10176,8 +6534,8 @@ "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -10188,157 +6546,210 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 14.36, + "tps_mean": 15.0, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 134.05, + "tps_std": 0.64, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 14.64, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 144.44, + "tps_std": 0.78, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 14.99, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 136.12, + "tps_std": 1.49, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 17.32, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 135.43, + "tps_std": 4.81, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 17.14, "tps_std": 0.02, "error": false, "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 133.4, - "tps_std": 0.48, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 14.32, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 235.0, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log", - "build": null - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 131.33, - "tps_std": 1.43, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 17.27, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 133.32, - "tps_std": 1.63, - "error": false, - "error_type": null, "backend": "Vulkan", "ngl": 99, "mmap": 0, @@ -10348,33 +6759,8 @@ "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 17.12, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -10385,8 +6771,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 115.77, - "tps_std": 1.42, + "tps_mean": 120.72, + "tps_std": 3.0, "error": false, "error_type": null, "backend": "Vulkan", @@ -10398,8 +6784,8 @@ "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -10410,7 +6796,7 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 17.75, + "tps_mean": 17.74, "tps_std": 0.01, "error": false, "error_type": null, @@ -10423,8 +6809,8 @@ "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -10435,8 +6821,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 121.8, - "tps_std": 1.81, + "tps_mean": 125.48, + "tps_std": 4.53, "error": false, "error_type": null, "backend": "Vulkan", @@ -10448,8 +6834,8 @@ "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -10460,8 +6846,8 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 18.1, - "tps_std": 0.0, + "tps_mean": 18.02, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", @@ -10473,408 +6859,8 @@ "quant": "Q3_K_XL", "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 426.32, - "tps_std": 6.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 25.0, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 479.22, - "tps_std": 4.77, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 24.91, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 158.56, - "tps_std": 4.2, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 25.03, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 165.57, - "tps_std": 2.56, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 24.89, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 432.19, - "tps_std": 6.24, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 25.02, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 477.24, - "tps_std": 5.25, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 24.88, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 162.44, - "tps_std": 4.25, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 25.05, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 160.17, - "tps_std": 3.26, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 24.91, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -10885,8 +6871,8 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 451.6, - "tps_std": 1.8, + "tps_mean": 436.29, + "tps_std": 4.51, "error": false, "error_type": null, "backend": "ROCm", @@ -10898,8 +6884,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -10923,8 +6909,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -10935,8 +6921,8 @@ "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 482.09, - "tps_std": 5.55, + "tps_mean": 480.95, + "tps_std": 4.32, "error": false, "error_type": null, "backend": "ROCm", @@ -10948,8 +6934,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -10960,7 +6946,7 @@ "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 25.77, + "tps_mean": 25.82, "tps_std": 0.0, "error": false, "error_type": null, @@ -10973,8 +6959,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -10985,8 +6971,8 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "pp512", - "tps_mean": 345.46, - "tps_std": 3.07, + "tps_mean": 346.51, + "tps_std": 4.73, "error": false, "error_type": null, "backend": "ROCm", @@ -10998,8 +6984,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -11023,8 +7009,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -11035,8 +7021,8 @@ "env_variant": "rocwmma-hblt0", "fa": true, "test": "pp512", - "tps_mean": 354.93, - "tps_std": 5.65, + "tps_mean": 356.62, + "tps_std": 6.87, "error": false, "error_type": null, "backend": "ROCm", @@ -11048,8 +7034,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -11060,6 +7046,606 @@ "env_variant": "rocwmma-hblt0", "fa": true, "test": "tg128", + "tps_mean": 25.85, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 435.7, + "tps_std": 6.51, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 24.23, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 485.65, + "tps_std": 7.08, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 25.71, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 343.63, + "tps_std": 2.43, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 25.52, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 364.38, + "tps_std": 4.78, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 25.83, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 435.87, + "tps_std": 4.36, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 25.56, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 477.05, + "tps_std": 5.97, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 25.84, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 338.71, + "tps_std": 3.32, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 25.51, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 347.28, + "tps_std": 5.57, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 25.86, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 439.13, + "tps_std": 4.42, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 25.54, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 482.81, + "tps_std": 7.55, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 25.85, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 334.68, + "tps_std": 2.49, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 25.54, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 346.28, + "tps_std": 2.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", "tps_mean": 25.8, "tps_std": 0.0, "error": false, @@ -11071,810 +7657,10 @@ "file_size_gib": 56.89, "name_params_b": 30.53, "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 448.97, - "tps_std": 7.97, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 25.57, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 489.49, - "tps_std": 3.92, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 25.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 343.78, - "tps_std": 1.91, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 25.48, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 363.09, - "tps_std": 8.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 25.75, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": false, - "test": "pp512", - "tps_mean": 435.53, - "tps_std": 2.47, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": false, - "test": "tg128", - "tps_mean": 24.69, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": true, - "test": "pp512", - "tps_mean": 476.36, - "tps_std": 3.91, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": true, - "test": "tg128", - "tps_mean": 24.93, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 330.47, - "tps_std": 5.12, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 25.09, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 343.19, - "tps_std": 4.41, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 24.9, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 431.59, - "tps_std": 5.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 25.06, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 476.09, - "tps_std": 5.36, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 24.93, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 332.32, - "tps_std": 3.6, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 25.11, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 344.55, - "tps_std": 3.84, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 24.92, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 431.29, - "tps_std": 3.17, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 25.1, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 475.35, - "tps_std": 3.41, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 24.94, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 329.24, - "tps_std": 2.98, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 25.06, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 348.53, - "tps_std": 5.6, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 24.92, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -11885,8 +7671,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 139.51, - "tps_std": 0.9, + "tps_mean": 216.27, + "tps_std": 0.39, "error": false, "error_type": null, "backend": "Vulkan", @@ -11898,8 +7684,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -11910,7 +7696,7 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 8.31, + "tps_mean": 10.07, "tps_std": 0.01, "error": false, "error_type": null, @@ -11923,8 +7709,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -11935,8 +7721,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 140.62, - "tps_std": 1.53, + "tps_mean": 216.46, + "tps_std": 0.31, "error": false, "error_type": null, "backend": "Vulkan", @@ -11948,8 +7734,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -11960,8 +7746,8 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 8.26, - "tps_std": 0.0, + "tps_mean": 10.0, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", @@ -11973,8 +7759,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -11985,8 +7771,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 150.84, - "tps_std": 1.38, + "tps_mean": 163.35, + "tps_std": 0.2, "error": false, "error_type": null, "backend": "Vulkan", @@ -11998,8 +7784,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -12010,8 +7796,8 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 8.24, - "tps_std": 0.0, + "tps_mean": 9.24, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", @@ -12023,8 +7809,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -12035,8 +7821,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 156.53, - "tps_std": 2.33, + "tps_mean": 166.05, + "tps_std": 0.25, "error": false, "error_type": null, "backend": "Vulkan", @@ -12048,8 +7834,8 @@ "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -12060,421 +7846,21 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 8.29, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 538.66, - "tps_std": 2.16, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 53.01, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 620.78, - "tps_std": 3.75, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 52.74, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 389.41, - "tps_std": 1.99, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 53.13, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 413.64, - "tps_std": 1.55, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 52.76, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 543.05, - "tps_std": 4.56, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 53.14, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 624.71, - "tps_std": 4.66, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 52.74, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 387.73, - "tps_std": 2.27, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 53.3, + "tps_mean": 9.29, "tps_std": 0.02, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, "params_b": 30.53, - "file_size_gib": 24.53, + "file_size_gib": 56.89, "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0.log", + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 415.19, - "tps_std": 1.76, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 52.59, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -12485,8 +7871,8 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 577.98, - "tps_std": 6.34, + "tps_mean": 562.46, + "tps_std": 5.25, "error": false, "error_type": null, "backend": "ROCm", @@ -12498,8 +7884,8 @@ "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -12510,7 +7896,7 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 55.37, + "tps_mean": 55.16, "tps_std": 0.01, "error": false, "error_type": null, @@ -12523,8 +7909,8 @@ "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -12535,8 +7921,8 @@ "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 623.53, - "tps_std": 3.7, + "tps_mean": 626.72, + "tps_std": 6.27, "error": false, "error_type": null, "backend": "ROCm", @@ -12548,8 +7934,8 @@ "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -12560,7 +7946,7 @@ "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 56.76, + "tps_mean": 57.04, "tps_std": 0.01, "error": false, "error_type": null, @@ -12573,8 +7959,8 @@ "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -12585,8 +7971,8 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "pp512", - "tps_mean": 582.34, - "tps_std": 4.27, + "tps_mean": 589.82, + "tps_std": 5.37, "error": false, "error_type": null, "backend": "ROCm", @@ -12598,8 +7984,8 @@ "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -12610,8 +7996,8 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "tg128", - "tps_mean": 55.34, - "tps_std": 0.02, + "tps_mean": 55.38, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -12623,8 +8009,8 @@ "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -12635,8 +8021,8 @@ "env_variant": "rocwmma-hblt0", "fa": true, "test": "pp512", - "tps_mean": 622.32, - "tps_std": 5.83, + "tps_mean": 620.07, + "tps_std": 8.69, "error": false, "error_type": null, "backend": "ROCm", @@ -12648,8 +8034,8 @@ "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -12660,7 +8046,7 @@ "env_variant": "rocwmma-hblt0", "fa": true, "test": "tg128", - "tps_mean": 56.82, + "tps_mean": 56.88, "tps_std": 0.01, "error": false, "error_type": null, @@ -12673,8 +8059,8 @@ "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -12685,8 +8071,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 582.99, - "tps_std": 4.97, + "tps_mean": 556.95, + "tps_std": 4.88, "error": false, "error_type": null, "backend": "ROCm", @@ -12710,8 +8096,8 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 55.33, - "tps_std": 0.02, + "tps_mean": 54.93, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -12735,8 +8121,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 632.12, - "tps_std": 3.63, + "tps_mean": 632.67, + "tps_std": 5.74, "error": false, "error_type": null, "backend": "ROCm", @@ -12760,8 +8146,8 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 56.73, - "tps_std": 0.0, + "tps_mean": 56.81, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -12785,8 +8171,8 @@ "env_variant": "hblt0", "fa": false, "test": "pp512", - "tps_mean": 582.14, - "tps_std": 4.21, + "tps_mean": 582.53, + "tps_std": 3.05, "error": false, "error_type": null, "backend": "ROCm", @@ -12810,7 +8196,7 @@ "env_variant": "hblt0", "fa": false, "test": "tg128", - "tps_mean": 55.39, + "tps_mean": 55.41, "tps_std": 0.01, "error": false, "error_type": null, @@ -12835,8 +8221,8 @@ "env_variant": "hblt0", "fa": true, "test": "pp512", - "tps_mean": 632.63, - "tps_std": 4.35, + "tps_mean": 629.19, + "tps_std": 4.25, "error": false, "error_type": null, "backend": "ROCm", @@ -12860,7 +8246,7 @@ "env_variant": "hblt0", "fa": true, "test": "tg128", - "tps_mean": 56.77, + "tps_mean": 56.94, "tps_std": 0.01, "error": false, "error_type": null, @@ -12877,206 +8263,6 @@ "number": "6609" } }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": false, - "test": "pp512", - "tps_mean": 552.48, - "tps_std": 1.39, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": false, - "test": "tg128", - "tps_mean": 53.22, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": true, - "test": "pp512", - "tps_mean": 618.51, - "tps_std": 8.44, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": true, - "test": "tg128", - "tps_mean": 52.82, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 576.87, - "tps_std": 7.86, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 53.42, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 609.51, - "tps_std": 4.26, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 52.77, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, { "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", @@ -13085,8 +8271,8 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 555.3, - "tps_std": 3.11, + "tps_mean": 557.13, + "tps_std": 5.01, "error": false, "error_type": null, "backend": "ROCm", @@ -13098,8 +8284,8 @@ "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -13110,8 +8296,8 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 53.34, - "tps_std": 0.0, + "tps_mean": 55.6, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -13123,8 +8309,8 @@ "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -13135,8 +8321,8 @@ "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 618.71, - "tps_std": 2.77, + "tps_mean": 623.1, + "tps_std": 4.22, "error": false, "error_type": null, "backend": "ROCm", @@ -13148,8 +8334,8 @@ "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -13160,107 +8346,7 @@ "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 52.77, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 575.05, - "tps_std": 4.27, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 53.33, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 618.89, - "tps_std": 4.53, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 52.69, + "tps_mean": 56.95, "tps_std": 0.01, "error": false, "error_type": null, @@ -13271,22 +8357,22 @@ "file_size_gib": 24.53, "name_params_b": 30.53, "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc", + "env": "rocm7_rc-rocwmma-hblt0", "env_base": "rocm7_rc", - "env_variant": null, + "env_variant": "rocwmma-hblt0", "fa": false, "test": "pp512", - "tps_mean": 549.65, - "tps_std": 6.16, + "tps_mean": 586.15, + "tps_std": 3.69, "error": false, "error_type": null, "backend": "ROCm", @@ -13296,21 +8382,21 @@ "file_size_gib": 24.53, "name_params_b": 30.53, "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc", + "env": "rocm7_rc-rocwmma-hblt0", "env_base": "rocm7_rc", - "env_variant": null, + "env_variant": "rocwmma-hblt0", "fa": false, "test": "tg128", - "tps_mean": 53.42, + "tps_mean": 55.49, "tps_std": 0.02, "error": false, "error_type": null, @@ -13321,10 +8407,110 @@ "file_size_gib": 24.53, "name_params_b": 30.53, "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 619.92, + "tps_std": 6.18, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 57.09, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 553.07, + "tps_std": 3.87, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 55.49, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log", + "build": { + "hash": "a3cb0474", + "number": "6735" } }, { @@ -13335,8 +8521,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 621.8, - "tps_std": 7.09, + "tps_mean": 632.51, + "tps_std": 3.87, "error": false, "error_type": null, "backend": "ROCm", @@ -13348,8 +8534,8 @@ "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -13360,7 +8546,7 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 52.78, + "tps_mean": 56.97, "tps_std": 0.01, "error": false, "error_type": null, @@ -13373,8 +8559,8 @@ "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -13385,8 +8571,8 @@ "env_variant": "hblt0", "fa": false, "test": "pp512", - "tps_mean": 575.05, - "tps_std": 3.02, + "tps_mean": 588.1, + "tps_std": 4.51, "error": false, "error_type": null, "backend": "ROCm", @@ -13398,8 +8584,8 @@ "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -13410,7 +8596,7 @@ "env_variant": "hblt0", "fa": false, "test": "tg128", - "tps_mean": 53.42, + "tps_mean": 55.49, "tps_std": 0.01, "error": false, "error_type": null, @@ -13423,8 +8609,8 @@ "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -13435,8 +8621,8 @@ "env_variant": "hblt0", "fa": true, "test": "pp512", - "tps_mean": 614.05, - "tps_std": 4.83, + "tps_mean": 631.26, + "tps_std": 5.65, "error": false, "error_type": null, "backend": "ROCm", @@ -13448,8 +8634,8 @@ "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -13460,7 +8646,7 @@ "env_variant": "hblt0", "fa": true, "test": "tg128", - "tps_mean": 52.83, + "tps_mean": 56.97, "tps_std": 0.01, "error": false, "error_type": null, @@ -13473,8 +8659,8 @@ "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -13485,8 +8671,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 1027.23, - "tps_std": 5.64, + "tps_mean": 1053.02, + "tps_std": 7.31, "error": false, "error_type": null, "backend": "Vulkan", @@ -13498,8 +8684,8 @@ "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -13510,8 +8696,8 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 63.42, - "tps_std": 0.03, + "tps_mean": 63.84, + "tps_std": 0.04, "error": false, "error_type": null, "backend": "Vulkan", @@ -13523,8 +8709,8 @@ "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -13535,8 +8721,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 1005.86, - "tps_std": 4.35, + "tps_mean": 1020.41, + "tps_std": 5.76, "error": false, "error_type": null, "backend": "Vulkan", @@ -13548,8 +8734,8 @@ "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -13560,7 +8746,7 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 59.12, + "tps_mean": 59.42, "tps_std": 0.04, "error": false, "error_type": null, @@ -13573,8 +8759,8 @@ "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -13585,8 +8771,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 764.63, - "tps_std": 3.75, + "tps_mean": 823.6, + "tps_std": 3.91, "error": false, "error_type": null, "backend": "Vulkan", @@ -13598,8 +8784,8 @@ "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -13610,8 +8796,8 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 64.77, - "tps_std": 0.1, + "tps_mean": 64.74, + "tps_std": 0.13, "error": false, "error_type": null, "backend": "Vulkan", @@ -13623,8 +8809,8 @@ "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -13635,8 +8821,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 801.22, - "tps_std": 4.04, + "tps_mean": 857.47, + "tps_std": 4.38, "error": false, "error_type": null, "backend": "Vulkan", @@ -13648,8 +8834,8 @@ "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -13660,8 +8846,8 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 63.44, - "tps_std": 0.12, + "tps_mean": 63.41, + "tps_std": 0.08, "error": false, "error_type": null, "backend": "Vulkan", @@ -13673,408 +8859,8 @@ "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 726.41, - "tps_std": 1.42, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 14.15, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 822.38, - "tps_std": 0.84, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 13.87, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 219.78, - "tps_std": 3.65, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 14.15, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 227.29, - "tps_std": 2.29, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 13.87, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 703.97, - "tps_std": 0.49, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 14.15, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 803.68, - "tps_std": 0.98, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 13.89, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 222.73, - "tps_std": 0.32, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 14.15, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 220.75, - "tps_std": 2.43, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 13.86, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -14085,8 +8871,8 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 754.71, - "tps_std": 0.79, + "tps_mean": 747.69, + "tps_std": 1.06, "error": false, "error_type": null, "backend": "ROCm", @@ -14098,8 +8884,8 @@ "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -14110,7 +8896,7 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 14.16, + "tps_mean": 14.17, "tps_std": 0.0, "error": false, "error_type": null, @@ -14123,8 +8909,8 @@ "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -14135,307 +8921,7 @@ "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 803.95, - "tps_std": 0.73, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 14.07, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 768.26, - "tps_std": 1.35, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 14.15, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 814.89, - "tps_std": 0.73, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 14.08, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 751.85, - "tps_std": 1.59, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 14.16, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 814.18, - "tps_std": 1.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 14.08, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 769.51, - "tps_std": 0.9, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 14.15, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 824.93, + "tps_mean": 809.2, "tps_std": 0.75, "error": false, "error_type": null, @@ -14446,6 +8932,306 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 14.11, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 781.56, + "tps_std": 1.57, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 14.16, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 819.61, + "tps_std": 0.83, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 14.11, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 720.89, + "tps_std": 0.7, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 14.15, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 815.58, + "tps_std": 1.47, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 14.08, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 772.99, + "tps_std": 2.45, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 14.15, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 824.58, + "tps_std": 1.69, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log", "build": { "hash": "4807e8f9", @@ -14460,7 +9246,7 @@ "env_variant": "hblt0", "fa": true, "test": "tg128", - "tps_mean": 14.08, + "tps_mean": 14.1, "tps_std": 0.0, "error": false, "error_type": null, @@ -14480,13 +9266,13 @@ { "model": "gemma-3-12b-it-UD-Q8_K_XL", "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants", + "env": "rocm7_rc-rocwmma", "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", + "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 714.52, - "tps_std": 1.47, + "tps_mean": 734.9, + "tps_std": 0.79, "error": false, "error_type": null, "backend": "ROCm", @@ -14496,18 +9282,218 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { "model": "gemma-3-12b-it-UD-Q8_K_XL", "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants", + "env": "rocm7_rc-rocwmma", "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 14.17, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 799.43, + "tps_std": 1.14, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 14.11, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 762.49, + "tps_std": 1.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 14.17, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 788.46, + "tps_std": 1.56, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 14.13, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 734.16, + "tps_std": 0.84, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, "fa": false, "test": "tg128", "tps_mean": 14.16, @@ -14521,410 +9507,10 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": true, - "test": "pp512", - "tps_mean": 810.36, - "tps_std": 1.88, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": true, - "test": "tg128", - "tps_mean": 13.89, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 752.18, - "tps_std": 0.86, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 14.14, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 797.91, - "tps_std": 0.87, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 13.88, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 738.56, - "tps_std": 1.66, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 14.15, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 810.24, - "tps_std": 2.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 13.89, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 751.87, - "tps_std": 1.69, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 14.15, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 798.06, - "tps_std": 1.45, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 13.88, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 738.5, - "tps_std": 1.56, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log", "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 14.15, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -14935,8 +9521,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 801.53, - "tps_std": 1.48, + "tps_mean": 816.86, + "tps_std": 0.8, "error": false, "error_type": null, "backend": "ROCm", @@ -14948,8 +9534,8 @@ "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -14960,7 +9546,7 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 13.88, + "tps_mean": 14.13, "tps_std": 0.0, "error": false, "error_type": null, @@ -14973,8 +9559,8 @@ "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -14985,696 +9571,296 @@ "env_variant": "hblt0", "fa": false, "test": "pp512", - "tps_mean": 751.81, - "tps_std": 0.96, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 14.15, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 791.04, - "tps_std": 2.55, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 13.87, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 679.86, - "tps_std": 1.33, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 14.6, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 659.67, - "tps_std": 0.72, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 14.5, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 504.31, - "tps_std": 3.2, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 14.14, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 501.78, - "tps_std": 2.8, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 13.95, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 413.72, - "tps_std": 0.86, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 4.09, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 469.46, + "tps_mean": 763.42, "tps_std": 1.37, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log", + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 4.11, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 84.71, - "tps_std": 8.12, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 4.1, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 84.12, - "tps_std": 9.82, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 4.11, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 408.4, - "tps_std": 1.09, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 4.1, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 470.49, - "tps_std": 1.46, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 4.1, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 84.93, - "tps_std": 8.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", "env_variant": "hblt0", "fa": false, "test": "tg128", - "tps_mean": 4.1, + "tps_mean": 14.17, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0.log", + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", "env_variant": "hblt0", "fa": true, "test": "pp512", - "tps_mean": 83.22, - "tps_std": 10.78, + "tps_mean": 806.7, + "tps_std": 1.39, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log", + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", "env_variant": "hblt0", "fa": true, "test": "tg128", - "tps_mean": 4.1, + "tps_mean": 14.12, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log", + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 668.85, + "tps_std": 1.34, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 14.64, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 648.34, + "tps_std": 0.61, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 14.52, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 541.39, + "tps_std": 3.33, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 14.17, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 532.11, + "tps_std": 3.15, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 13.98, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" } }, { @@ -15685,8 +9871,8 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 425.33, - "tps_std": 1.61, + "tps_mean": 420.14, + "tps_std": 0.69, "error": false, "error_type": null, "backend": "ROCm", @@ -15698,8 +9884,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -15710,7 +9896,7 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 4.11, + "tps_mean": 4.1, "tps_std": 0.0, "error": false, "error_type": null, @@ -15723,8 +9909,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -15735,8 +9921,8 @@ "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 470.8, - "tps_std": 1.97, + "tps_mean": 468.87, + "tps_std": 0.84, "error": false, "error_type": null, "backend": "ROCm", @@ -15748,8 +9934,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -15760,7 +9946,7 @@ "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 4.1, + "tps_mean": 4.11, "tps_std": 0.0, "error": false, "error_type": null, @@ -15773,8 +9959,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -15785,7 +9971,7 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "pp512", - "tps_mean": 469.59, + "tps_mean": 477.22, "tps_std": 0.76, "error": false, "error_type": null, @@ -15798,8 +9984,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -15810,7 +9996,7 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "tg128", - "tps_mean": 4.04, + "tps_mean": 4.1, "tps_std": 0.0, "error": false, "error_type": null, @@ -15823,8 +10009,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -15835,8 +10021,8 @@ "env_variant": "rocwmma-hblt0", "fa": true, "test": "pp512", - "tps_mean": 524.38, - "tps_std": 0.7, + "tps_mean": 524.62, + "tps_std": 0.55, "error": false, "error_type": null, "backend": "ROCm", @@ -15848,8 +10034,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -15860,7 +10046,7 @@ "env_variant": "rocwmma-hblt0", "fa": true, "test": "tg128", - "tps_mean": 4.1, + "tps_mean": 4.11, "tps_std": 0.0, "error": false, "error_type": null, @@ -15873,8 +10059,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -15885,8 +10071,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 418.14, - "tps_std": 0.79, + "tps_mean": 413.24, + "tps_std": 0.72, "error": false, "error_type": null, "backend": "ROCm", @@ -15935,8 +10121,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 472.28, - "tps_std": 1.24, + "tps_mean": 471.95, + "tps_std": 1.68, "error": false, "error_type": null, "backend": "ROCm", @@ -15985,8 +10171,8 @@ "env_variant": "hblt0", "fa": false, "test": "pp512", - "tps_mean": 471.56, - "tps_std": 0.6, + "tps_mean": 471.17, + "tps_std": 0.97, "error": false, "error_type": null, "backend": "ROCm", @@ -16035,8 +10221,8 @@ "env_variant": "hblt0", "fa": true, "test": "pp512", - "tps_mean": 530.58, - "tps_std": 0.66, + "tps_mean": 529.49, + "tps_std": 1.2, "error": false, "error_type": null, "backend": "ROCm", @@ -16060,7 +10246,7 @@ "env_variant": "hblt0", "fa": true, "test": "tg128", - "tps_mean": 4.11, + "tps_mean": 4.1, "tps_std": 0.0, "error": false, "error_type": null, @@ -16077,206 +10263,6 @@ "number": "6609" } }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": false, - "test": "pp512", - "tps_mean": 412.86, - "tps_std": 1.22, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": false, - "test": "tg128", - "tps_mean": 4.1, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": true, - "test": "pp512", - "tps_mean": 465.55, - "tps_std": 1.95, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": true, - "test": "tg128", - "tps_mean": 4.1, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 453.66, - "tps_std": 0.77, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 4.1, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 498.77, - "tps_std": 0.53, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 4.1, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, { "model": "gemma-3-27b-it-BF16-00001-of-00002", "model_clean": "gemma-3-27b-it-BF16", @@ -16285,8 +10271,8 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 419.05, - "tps_std": 0.86, + "tps_mean": 421.4, + "tps_std": 0.43, "error": false, "error_type": null, "backend": "ROCm", @@ -16298,8 +10284,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -16310,7 +10296,7 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 4.09, + "tps_mean": 4.1, "tps_std": 0.0, "error": false, "error_type": null, @@ -16323,8 +10309,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -16335,8 +10321,8 @@ "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 466.36, - "tps_std": 1.34, + "tps_mean": 464.58, + "tps_std": 0.58, "error": false, "error_type": null, "backend": "ROCm", @@ -16348,8 +10334,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -16360,206 +10346,6 @@ "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 4.1, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 451.57, - "tps_std": 0.41, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 4.09, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 499.87, - "tps_std": 0.47, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 4.1, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 422.0, - "tps_std": 0.56, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 4.1, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 469.45, - "tps_std": 1.83, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "tg128", "tps_mean": 4.11, "tps_std": 0.0, "error": false, @@ -16571,71 +10357,21 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { "model": "gemma-3-27b-it-BF16-00001-of-00002", "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc-hblt0", + "env": "rocm7_rc-rocwmma-hblt0", "env_base": "rocm7_rc", - "env_variant": "hblt0", + "env_variant": "rocwmma-hblt0", "fa": false, "test": "pp512", - "tps_mean": 453.24, - "tps_std": 0.64, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 4.09, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 502.26, + "tps_mean": 458.08, "tps_std": 0.81, "error": false, "error_type": null, @@ -16646,19 +10382,19 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { "model": "gemma-3-27b-it-BF16-00001-of-00002", "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc-hblt0", + "env": "rocm7_rc-rocwmma-hblt0", "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, + "env_variant": "rocwmma-hblt0", + "fa": false, "test": "tg128", "tps_mean": 4.1, "tps_std": 0.0, @@ -16671,10 +10407,260 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 499.11, + "tps_std": 0.58, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 4.11, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 413.95, + "tps_std": 0.6, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 4.0, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 469.08, + "tps_std": 0.27, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 4.11, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 457.65, + "tps_std": 0.59, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 4.1, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 503.26, + "tps_std": 0.79, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 4.11, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" } }, { @@ -16729,8 +10715,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 134.52, - "tps_std": 0.99, + "tps_mean": 103.58, + "tps_std": 1.09, "error": false, "error_type": null, "backend": "Vulkan", @@ -16742,8 +10728,8 @@ "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -16754,56 +10740,6 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 3.92, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 138.59, - "tps_std": 1.23, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", "tps_mean": 3.93, "tps_std": 0.0, "error": false, @@ -16815,410 +10751,60 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 107.33, + "tps_std": 1.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 1886.62, - "tps_std": 6.81, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 76.36, - "tps_std": 5.1, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 2282.08, - "tps_std": 7.86, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 72.4, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 713.12, - "tps_std": 38.25, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 67.01, - "tps_std": 5.06, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 676.8, - "tps_std": 75.42, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 59.1, - "tps_std": 2.57, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 1857.54, - "tps_std": 7.32, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 75.34, - "tps_std": 7.91, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 2214.91, - "tps_std": 7.2, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 72.36, - "tps_std": 0.05, + "tps_mean": 3.91, + "tps_std": 0.0, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__fa1.log", + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 656.82, - "tps_std": 60.97, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 63.81, - "tps_std": 3.45, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 663.36, - "tps_std": 79.77, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 58.63, - "tps_std": 2.61, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -17229,8 +10815,8 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 2110.44, - "tps_std": 6.13, + "tps_mean": 1935.04, + "tps_std": 3.89, "error": false, "error_type": null, "backend": "ROCm", @@ -17242,8 +10828,8 @@ "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -17254,8 +10840,8 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 79.31, - "tps_std": 0.03, + "tps_mean": 79.17, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -17267,8 +10853,8 @@ "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -17279,8 +10865,8 @@ "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 2261.02, - "tps_std": 8.46, + "tps_mean": 2278.78, + "tps_std": 8.79, "error": false, "error_type": null, "backend": "ROCm", @@ -17292,8 +10878,8 @@ "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -17304,8 +10890,8 @@ "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 77.07, - "tps_std": 0.04, + "tps_mean": 76.94, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -17317,8 +10903,8 @@ "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -17329,8 +10915,8 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "pp512", - "tps_mean": 2040.3, - "tps_std": 9.11, + "tps_mean": 2072.56, + "tps_std": 8.2, "error": false, "error_type": null, "backend": "ROCm", @@ -17342,8 +10928,8 @@ "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -17354,57 +10940,7 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "tg128", - "tps_mean": 79.33, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 2143.83, - "tps_std": 3.82, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 77.19, + "tps_mean": 79.03, "tps_std": 0.02, "error": false, "error_type": null, @@ -17415,10 +10951,60 @@ "file_size_gib": 1.8, "name_params_b": 3.88, "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 2158.84, + "tps_std": 4.74, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 77.11, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1.log", + "build": { + "hash": "11f0af55", + "number": "6736" } }, { @@ -17429,8 +11015,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 2099.8, - "tps_std": 6.34, + "tps_mean": 1896.32, + "tps_std": 6.0, "error": false, "error_type": null, "backend": "ROCm", @@ -17454,8 +11040,8 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 79.43, - "tps_std": 0.05, + "tps_mean": 79.32, + "tps_std": 0.08, "error": false, "error_type": null, "backend": "ROCm", @@ -17479,8 +11065,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 2262.0, - "tps_std": 6.48, + "tps_mean": 2261.52, + "tps_std": 12.45, "error": false, "error_type": null, "backend": "ROCm", @@ -17504,8 +11090,8 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 77.04, - "tps_std": 0.03, + "tps_mean": 77.18, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -17529,8 +11115,8 @@ "env_variant": "hblt0", "fa": false, "test": "pp512", - "tps_mean": 2038.14, - "tps_std": 6.72, + "tps_mean": 2038.57, + "tps_std": 4.97, "error": false, "error_type": null, "backend": "ROCm", @@ -17554,8 +11140,8 @@ "env_variant": "hblt0", "fa": false, "test": "tg128", - "tps_mean": 79.41, - "tps_std": 0.04, + "tps_mean": 79.42, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "ROCm", @@ -17579,8 +11165,8 @@ "env_variant": "hblt0", "fa": true, "test": "pp512", - "tps_mean": 2141.85, - "tps_std": 6.83, + "tps_mean": 2127.98, + "tps_std": 4.53, "error": false, "error_type": null, "backend": "ROCm", @@ -17604,8 +11190,8 @@ "env_variant": "hblt0", "fa": true, "test": "tg128", - "tps_mean": 77.14, - "tps_std": 0.02, + "tps_mean": 77.17, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -17621,206 +11207,6 @@ "number": "6609" } }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": false, - "test": "pp512", - "tps_mean": 1830.34, - "tps_std": 15.12, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": false, - "test": "tg128", - "tps_mean": 60.04, - "tps_std": 4.39, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": true, - "test": "pp512", - "tps_mean": 2178.17, - "tps_std": 91.83, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": true, - "test": "tg128", - "tps_mean": 55.78, - "tps_std": 3.2, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 2016.93, - "tps_std": 4.81, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 58.29, - "tps_std": 3.79, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 2094.58, - "tps_std": 12.74, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 66.23, - "tps_std": 8.72, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, { "model": "gemma-3-4b-it-Q3_K_S", "model_clean": "gemma-3-4b-it-Q3_K_S", @@ -17829,8 +11215,8 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 1865.95, - "tps_std": 7.12, + "tps_mean": 1858.93, + "tps_std": 11.45, "error": false, "error_type": null, "backend": "ROCm", @@ -17842,8 +11228,8 @@ "quant": "Q3_K_S", "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -17854,943 +11240,571 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 79.56, - "tps_std": 0.09, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 2252.6, - "tps_std": 11.74, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 72.57, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 2011.51, - "tps_std": 6.91, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 79.65, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 2111.65, - "tps_std": 7.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 67.62, - "tps_std": 4.71, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 1869.83, - "tps_std": 5.67, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 79.48, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 2229.43, - "tps_std": 7.33, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 72.58, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 2014.48, - "tps_std": 4.39, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 79.61, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 2064.91, - "tps_std": 7.11, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 72.45, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 1288.81, - "tps_std": 206.13, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 86.61, - "tps_std": 1.74, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 1149.64, - "tps_std": 181.24, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 85.5, - "tps_std": 1.74, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 967.51, - "tps_std": 123.3, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 86.74, - "tps_std": 1.45, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 991.94, - "tps_std": 120.98, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 85.61, - "tps_std": 0.84, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 629.19, - "tps_std": 3.98, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_3-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 34.79, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_3-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 780.88, - "tps_std": 9.39, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 34.14, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 364.08, - "tps_std": 1.11, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 34.81, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 400.84, - "tps_std": 0.84, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 34.17, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 627.57, - "tps_std": 4.14, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_3.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 34.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_3.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 783.4, - "tps_std": 1.22, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_3__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 34.15, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_3__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": false, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_3__hblt0.log", - "build": null - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 402.16, - "tps_std": 1.31, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_3__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 34.16, + "tps_mean": 79.18, "tps_std": 0.02, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_3__hblt0__fa1.log", + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 2249.97, + "tps_std": 8.38, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 77.23, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 2045.87, + "tps_std": 7.65, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 79.18, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 2110.98, + "tps_std": 11.29, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 77.03, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 1865.48, + "tps_std": 5.97, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 79.26, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 2265.97, + "tps_std": 12.2, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 77.23, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 2034.18, + "tps_std": 7.8, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 79.2, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 2104.47, + "tps_std": 6.9, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 77.16, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 1417.85, + "tps_std": 229.3, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 85.91, + "tps_std": 1.98, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 1193.42, + "tps_std": 154.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 82.87, + "tps_std": 1.37, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 1256.94, + "tps_std": 209.42, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 92.19, + "tps_std": 0.42, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 1150.84, + "tps_std": 174.29, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 85.89, + "tps_std": 0.11, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" } }, { @@ -18801,8 +11815,8 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 683.95, - "tps_std": 7.54, + "tps_mean": 667.15, + "tps_std": 5.65, "error": false, "error_type": null, "backend": "ROCm", @@ -18814,8 +11828,8 @@ "quant": "F16", "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -18826,906 +11840,6 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 34.82, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 783.37, - "tps_std": 6.29, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 35.06, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 689.85, - "tps_std": 4.6, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 34.84, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 789.94, - "tps_std": 5.16, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 35.17, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 682.09, - "tps_std": 3.61, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 34.89, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 790.76, - "tps_std": 6.72, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 35.06, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 688.37, - "tps_std": 4.43, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 34.74, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 777.75, - "tps_std": 25.64, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 35.12, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": false, - "test": "pp512", - "tps_mean": 650.02, - "tps_std": 4.28, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": false, - "test": "tg128", - "tps_mean": 34.0, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": true, - "test": "pp512", - "tps_mean": 778.25, - "tps_std": 3.4, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": true, - "test": "tg128", - "tps_mean": 34.25, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 688.7, - "tps_std": 7.72, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 34.83, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 780.39, - "tps_std": 6.28, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 34.2, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 653.89, - "tps_std": 3.96, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 34.89, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 770.19, - "tps_std": 5.64, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 34.18, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 691.27, - "tps_std": 4.9, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 34.83, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 772.44, - "tps_std": 6.68, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 34.24, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 653.09, - "tps_std": 7.25, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 34.86, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 779.77, - "tps_std": 4.78, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 34.28, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 683.9, - "tps_std": 5.36, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", "tps_mean": 34.77, "tps_std": 0.0, "error": false, @@ -19737,22 +11851,22 @@ "file_size_gib": 60.87, "name_params_b": 116.83, "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc__hblt0.log", + "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "11f0af55", + "number": "6736" } }, { "model": "gpt-oss-120b-F16", "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 777.37, - "tps_std": 4.77, + "tps_mean": 786.49, + "tps_std": 4.02, "error": false, "error_type": null, "backend": "ROCm", @@ -19762,21 +11876,21 @@ "file_size_gib": 60.87, "name_params_b": 116.83, "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log", + "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "11f0af55", + "number": "6736" } }, { "model": "gpt-oss-120b-F16", "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 34.17, + "tps_mean": 35.16, "tps_std": 0.0, "error": false, "error_type": null, @@ -19787,346 +11901,46 @@ "file_size_gib": 60.87, "name_params_b": 116.83, "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log", + "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "11f0af55", + "number": "6736" } }, { "model": "gpt-oss-120b-F16", "model_clean": "gpt-oss-120b-F16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 626.37, - "tps_std": 2.34, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__vulkan_amdvlk.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 35.23, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__vulkan_amdvlk.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 723.8, - "tps_std": 2.49, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 34.59, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 401.61, - "tps_std": 1.76, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__vulkan_radv.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 33.86, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__vulkan_radv.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 444.61, - "tps_std": 1.65, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__vulkan_radv__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 33.84, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__vulkan_radv__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 612.55, - "tps_std": 6.58, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 47.08, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 766.08, - "tps_std": 2.67, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 45.93, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", "env_variant": "rocwmma-hblt0", "fa": false, "test": "pp512", - "tps_mean": 362.01, - "tps_std": 1.06, + "tps_mean": 700.13, + "tps_std": 3.54, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, "params_b": 116.83, - "file_size_gib": 59.02, + "file_size_gib": 60.87, "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log", + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "11f0af55", + "number": "6736" } }, { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", "env_variant": "rocwmma-hblt0", "fa": false, "test": "tg128", - "tps_mean": 47.04, + "tps_mean": 34.79, "tps_std": 0.0, "error": false, "error_type": null, @@ -20134,22 +11948,522 @@ "ngl": 99, "mmap": 0, "params_b": 116.83, - "file_size_gib": 59.02, + "file_size_gib": 60.87, "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log", + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "11f0af55", + "number": "6736" } }, { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", "env_variant": "rocwmma-hblt0", "fa": true, + "test": "pp512", + "tps_mean": 783.5, + "tps_std": 5.44, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0__fa1.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 35.12, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0__fa1.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 650.45, + "tps_std": 2.11, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_4.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 32.9, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_4.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 790.9, + "tps_std": 4.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_4__fa1.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 34.98, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_4__fa1.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 687.78, + "tps_std": 5.65, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_4__hblt0.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 34.8, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_4__hblt0.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 792.0, + "tps_std": 9.77, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_4__hblt0__fa1.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 35.14, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm6_4_4__hblt0__fa1.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 660.37, + "tps_std": 3.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 34.8, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 770.55, + "tps_std": 4.47, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 35.07, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 698.86, + "tps_std": 6.39, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 34.8, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 777.48, + "tps_std": 7.78, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 35.14, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 666.29, + "tps_std": 5.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 34.8, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, "test": null, "tps_mean": null, "tps_std": null, @@ -20161,44 +12475,44 @@ "params_b": null, "file_size_gib": null, "name_params_b": null, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log", + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc__fa1.log", "build": null }, { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", "fa": false, "test": "pp512", - "tps_mean": 614.68, - "tps_std": 3.32, + "tps_mean": 702.07, + "tps_std": 4.76, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, "params_b": 116.83, - "file_size_gib": 59.02, + "file_size_gib": 60.87, "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3.log", + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc__hblt0.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", "fa": false, "test": "tg128", - "tps_mean": 47.04, + "tps_mean": 34.78, "tps_std": 0.01, "error": false, "error_type": null, @@ -20206,163 +12520,263 @@ "ngl": 99, "mmap": 0, "params_b": 116.83, - "file_size_gib": 59.02, + "file_size_gib": 60.87, "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3.log", + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc__hblt0.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", "fa": true, "test": "pp512", - "tps_mean": 768.28, - "tps_std": 5.81, + "tps_mean": 760.87, + "tps_std": 22.7, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, "params_b": 116.83, - "file_size_gib": 59.02, + "file_size_gib": 60.87, "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__fa1.log", + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", "fa": true, "test": "tg128", - "tps_mean": 45.86, - "tps_std": 0.0, + "tps_mean": 35.07, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, "params_b": 116.83, - "file_size_gib": 59.02, + "file_size_gib": 60.87, "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__fa1.log", + "quant": "F16", + "log": "results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 362.06, + "tps_mean": 627.11, "tps_std": 1.45, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, "params_b": 116.83, - "file_size_gib": 59.02, + "file_size_gib": 60.87, "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0.log", + "quant": "F16", + "log": "results/gpt-oss-120b-F16__vulkan_amdvlk.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 47.11, - "tps_std": 0.0, + "tps_mean": 35.32, + "tps_std": 0.02, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, "params_b": 116.83, - "file_size_gib": 59.02, + "file_size_gib": 60.87, "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0.log", + "quant": "F16", + "log": "results/gpt-oss-120b-F16__vulkan_amdvlk.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 397.06, - "tps_std": 1.41, + "tps_mean": 719.39, + "tps_std": 2.63, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, "params_b": 116.83, - "file_size_gib": 59.02, + "file_size_gib": 60.87, "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0__fa1.log", + "quant": "F16", + "log": "results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 46.01, + "tps_mean": 34.71, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 433.14, + "tps_std": 1.74, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__vulkan_radv.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 33.99, "tps_std": 0.01, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, "params_b": 116.83, - "file_size_gib": 59.02, + "file_size_gib": 60.87, "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0__fa1.log", + "quant": "F16", + "log": "results/gpt-oss-120b-F16__vulkan_radv.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 481.71, + "tps_std": 2.11, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__vulkan_radv__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-120b-F16", + "model_clean": "gpt-oss-120b-F16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 34.46, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 60.87, + "name_params_b": 116.83, + "quant": "F16", + "log": "results/gpt-oss-120b-F16__vulkan_radv__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" } }, { @@ -20373,8 +12787,8 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 668.07, - "tps_std": 3.99, + "tps_mean": 653.32, + "tps_std": 7.07, "error": false, "error_type": null, "backend": "ROCm", @@ -20386,8 +12800,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -20398,7 +12812,7 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 47.22, + "tps_mean": 47.09, "tps_std": 0.01, "error": false, "error_type": null, @@ -20411,8 +12825,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -20423,8 +12837,8 @@ "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 767.63, - "tps_std": 5.37, + "tps_mean": 767.28, + "tps_std": 2.81, "error": false, "error_type": null, "backend": "ROCm", @@ -20436,8 +12850,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -20448,7 +12862,7 @@ "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 47.72, + "tps_mean": 47.63, "tps_std": 0.01, "error": false, "error_type": null, @@ -20461,8 +12875,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -20473,8 +12887,8 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "pp512", - "tps_mean": 685.61, - "tps_std": 4.6, + "tps_mean": 703.72, + "tps_std": 4.21, "error": false, "error_type": null, "backend": "ROCm", @@ -20486,8 +12900,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -20498,8 +12912,8 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "tg128", - "tps_mean": 47.15, - "tps_std": 0.0, + "tps_mean": 47.05, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "ROCm", @@ -20511,8 +12925,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -20523,8 +12937,8 @@ "env_variant": "rocwmma-hblt0", "fa": true, "test": "pp512", - "tps_mean": 785.43, - "tps_std": 4.63, + "tps_mean": 773.91, + "tps_std": 4.34, "error": false, "error_type": null, "backend": "ROCm", @@ -20536,8 +12950,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -20548,6 +12962,606 @@ "env_variant": "rocwmma-hblt0", "fa": true, "test": "tg128", + "tps_mean": 47.61, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 644.73, + "tps_std": 4.21, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 46.15, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 766.09, + "tps_std": 8.12, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 47.51, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 660.34, + "tps_std": 48.62, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 46.72, + "tps_std": 0.39, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 780.39, + "tps_std": 3.58, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 47.7, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 651.94, + "tps_std": 3.45, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 47.17, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 756.58, + "tps_std": 4.67, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 47.62, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 700.53, + "tps_std": 1.99, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 47.17, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 772.03, + "tps_std": 9.61, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 47.64, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 650.2, + "tps_std": 4.73, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 47.07, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 755.62, + "tps_std": 4.68, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 47.7, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 698.26, + "tps_std": 2.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 47.05, + "tps_std": 0.12, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 773.2, + "tps_std": 7.58, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "test": "tg128", "tps_mean": 47.65, "tps_std": 0.01, "error": false, @@ -20559,810 +13573,10 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 664.62, - "tps_std": 3.53, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 47.11, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 773.25, - "tps_std": 6.5, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 47.69, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 686.92, - "tps_std": 5.29, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 47.15, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 781.6, - "tps_std": 6.15, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 47.76, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": false, - "test": "pp512", - "tps_mean": 639.82, - "tps_std": 2.41, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": false, - "test": "tg128", - "tps_mean": 46.29, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": true, - "test": "pp512", - "tps_mean": 756.98, - "tps_std": 1.3, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": true, - "test": "tg128", - "tps_mean": 46.0, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 683.94, - "tps_std": 2.89, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 47.29, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 778.15, - "tps_std": 4.46, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 46.04, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 641.91, - "tps_std": 7.56, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 47.2, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 756.17, - "tps_std": 4.24, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 46.05, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 681.37, - "tps_std": 3.54, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 47.19, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 770.6, - "tps_std": 3.18, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 46.09, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 641.87, - "tps_std": 3.27, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 47.17, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 757.39, - "tps_std": 3.8, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 46.04, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 688.94, - "tps_std": 3.9, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 47.06, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 769.31, - "tps_std": 5.48, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 46.07, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -21373,8 +13587,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 682.6, - "tps_std": 3.3, + "tps_mean": 681.25, + "tps_std": 3.69, "error": false, "error_type": null, "backend": "Vulkan", @@ -21386,8 +13600,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -21398,8 +13612,8 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 51.41, - "tps_std": 0.01, + "tps_mean": 51.65, + "tps_std": 0.03, "error": false, "error_type": null, "backend": "Vulkan", @@ -21411,8 +13625,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -21423,8 +13637,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 790.49, - "tps_std": 4.84, + "tps_mean": 788.46, + "tps_std": 4.36, "error": false, "error_type": null, "backend": "Vulkan", @@ -21436,8 +13650,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -21448,8 +13662,8 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 50.15, - "tps_std": 0.01, + "tps_mean": 50.32, + "tps_std": 0.03, "error": false, "error_type": null, "backend": "Vulkan", @@ -21461,8 +13675,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -21473,8 +13687,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 426.15, - "tps_std": 2.65, + "tps_mean": 464.26, + "tps_std": 2.62, "error": false, "error_type": null, "backend": "Vulkan", @@ -21486,8 +13700,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -21498,7 +13712,7 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 52.79, + "tps_mean": 52.85, "tps_std": 0.16, "error": false, "error_type": null, @@ -21511,8 +13725,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -21523,8 +13737,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 478.69, - "tps_std": 3.26, + "tps_mean": 526.13, + "tps_std": 3.2, "error": false, "error_type": null, "backend": "Vulkan", @@ -21536,8 +13750,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -21548,8 +13762,8 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 52.75, - "tps_std": 0.06, + "tps_mean": 52.9, + "tps_std": 0.05, "error": false, "error_type": null, "backend": "Vulkan", @@ -21561,408 +13775,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 1140.4, - "tps_std": 8.72, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_3-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 27.24, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_3-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 1492.3, - "tps_std": 22.33, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 26.96, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 327.64, - "tps_std": 1.89, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 27.19, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 342.77, - "tps_std": 3.39, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 27.0, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 1147.38, - "tps_std": 6.4, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_3.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 27.24, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_3.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 1508.59, - "tps_std": 26.99, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_3__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 27.0, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_3__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 326.33, - "tps_std": 6.68, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_3__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 27.2, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_3__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 344.41, - "tps_std": 7.32, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_3__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 26.96, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_3__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -21973,8 +13787,8 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 1253.42, - "tps_std": 6.47, + "tps_mean": 1230.17, + "tps_std": 12.16, "error": false, "error_type": null, "backend": "ROCm", @@ -21986,8 +13800,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm6_4_4-rocwmma.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -21998,7 +13812,7 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 27.29, + "tps_mean": 27.22, "tps_std": 0.0, "error": false, "error_type": null, @@ -22011,8 +13825,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm6_4_4-rocwmma.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -22023,8 +13837,8 @@ "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 1502.41, - "tps_std": 9.99, + "tps_mean": 1493.11, + "tps_std": 16.19, "error": false, "error_type": null, "backend": "ROCm", @@ -22036,8 +13850,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -22048,7 +13862,7 @@ "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 27.35, + "tps_mean": 27.3, "tps_std": 0.0, "error": false, "error_type": null, @@ -22061,8 +13875,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -22073,8 +13887,8 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "pp512", - "tps_mean": 1234.38, - "tps_std": 12.52, + "tps_mean": 1274.89, + "tps_std": 11.66, "error": false, "error_type": null, "backend": "ROCm", @@ -22086,8 +13900,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -22098,7 +13912,7 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "tg128", - "tps_mean": 27.25, + "tps_mean": 27.18, "tps_std": 0.0, "error": false, "error_type": null, @@ -22111,8 +13925,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -22123,8 +13937,8 @@ "env_variant": "rocwmma-hblt0", "fa": true, "test": "pp512", - "tps_mean": 1463.75, - "tps_std": 8.49, + "tps_mean": 1460.62, + "tps_std": 17.09, "error": false, "error_type": null, "backend": "ROCm", @@ -22136,8 +13950,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -22148,7 +13962,7 @@ "env_variant": "rocwmma-hblt0", "fa": true, "test": "tg128", - "tps_mean": 27.34, + "tps_mean": 27.32, "tps_std": 0.0, "error": false, "error_type": null, @@ -22161,8 +13975,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -22173,8 +13987,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 1258.74, - "tps_std": 12.44, + "tps_mean": 1163.88, + "tps_std": 56.1, "error": false, "error_type": null, "backend": "ROCm", @@ -22198,7 +14012,7 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 27.27, + "tps_mean": 25.78, "tps_std": 0.0, "error": false, "error_type": null, @@ -22223,8 +14037,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 1513.34, - "tps_std": 10.79, + "tps_mean": 1508.43, + "tps_std": 11.78, "error": false, "error_type": null, "backend": "ROCm", @@ -22248,7 +14062,7 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 27.35, + "tps_mean": 27.36, "tps_std": 0.0, "error": false, "error_type": null, @@ -22273,8 +14087,8 @@ "env_variant": "hblt0", "fa": false, "test": "pp512", - "tps_mean": 1235.02, - "tps_std": 7.1, + "tps_mean": 1238.64, + "tps_std": 11.98, "error": false, "error_type": null, "backend": "ROCm", @@ -22323,8 +14137,8 @@ "env_variant": "hblt0", "fa": true, "test": "pp512", - "tps_mean": 1475.65, - "tps_std": 12.28, + "tps_mean": 1492.62, + "tps_std": 19.37, "error": false, "error_type": null, "backend": "ROCm", @@ -22348,6 +14162,206 @@ "env_variant": "hblt0", "fa": true, "test": "tg128", + "tps_mean": 27.36, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm6_4_4__hblt0__fa1.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "pp512", + "tps_mean": 1220.88, + "tps_std": 18.41, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": false, + "test": "tg128", + "tps_mean": 27.18, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 1470.86, + "tps_std": 14.39, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 27.29, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 1252.31, + "tps_std": 14.38, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 27.17, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 1421.53, + "tps_std": 7.06, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 38.97, + "name_params_b": 20.91, + "quant": "F32", + "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-20b-F32", + "model_clean": "gpt-oss-20b-F32", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", "tps_mean": 27.32, "tps_std": 0.0, "error": false, @@ -22359,410 +14373,10 @@ "file_size_gib": 38.97, "name_params_b": 20.91, "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_4__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": false, - "test": "pp512", - "tps_mean": 1202.41, - "tps_std": 13.79, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": false, - "test": "tg128", - "tps_mean": 26.03, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": true, - "test": "pp512", - "tps_mean": 1484.6, - "tps_std": 5.26, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": true, - "test": "tg128", - "tps_mean": 26.9, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 1225.63, - "tps_std": 9.42, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 27.25, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 1421.82, - "tps_std": 12.16, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 26.95, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 1209.21, - "tps_std": 16.57, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 27.23, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 1489.0, - "tps_std": 6.12, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 26.98, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 1218.32, - "tps_std": 13.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 27.21, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 1424.6, - "tps_std": 8.06, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 26.98, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -22773,8 +14387,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 1198.99, - "tps_std": 21.23, + "tps_mean": 1228.62, + "tps_std": 4.47, "error": false, "error_type": null, "backend": "ROCm", @@ -22786,8 +14400,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -22798,7 +14412,7 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 27.25, + "tps_mean": 27.17, "tps_std": 0.0, "error": false, "error_type": null, @@ -22811,8 +14425,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -22823,8 +14437,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 1506.46, - "tps_std": 15.83, + "tps_mean": 1492.83, + "tps_std": 17.46, "error": false, "error_type": null, "backend": "ROCm", @@ -22836,8 +14450,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -22848,7 +14462,7 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 26.98, + "tps_mean": 27.29, "tps_std": 0.0, "error": false, "error_type": null, @@ -22861,8 +14475,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -22873,8 +14487,8 @@ "env_variant": "hblt0", "fa": false, "test": "pp512", - "tps_mean": 1224.83, - "tps_std": 11.58, + "tps_mean": 1263.37, + "tps_std": 8.5, "error": false, "error_type": null, "backend": "ROCm", @@ -22886,8 +14500,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc__hblt0.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -22898,7 +14512,7 @@ "env_variant": "hblt0", "fa": false, "test": "tg128", - "tps_mean": 27.2, + "tps_mean": 27.18, "tps_std": 0.0, "error": false, "error_type": null, @@ -22911,8 +14525,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc__hblt0.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -22923,8 +14537,8 @@ "env_variant": "hblt0", "fa": true, "test": "pp512", - "tps_mean": 1446.22, - "tps_std": 17.28, + "tps_mean": 1426.1, + "tps_std": 25.91, "error": false, "error_type": null, "backend": "ROCm", @@ -22936,8 +14550,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -22948,7 +14562,7 @@ "env_variant": "hblt0", "fa": true, "test": "tg128", - "tps_mean": 26.99, + "tps_mean": 27.35, "tps_std": 0.0, "error": false, "error_type": null, @@ -22961,8 +14575,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -22973,8 +14587,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 469.06, - "tps_std": 6.82, + "tps_mean": 566.88, + "tps_std": 3.31, "error": false, "error_type": null, "backend": "Vulkan", @@ -22986,8 +14600,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__vulkan_amdvlk.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -22998,7 +14612,7 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 15.25, + "tps_mean": 18.39, "tps_std": 0.01, "error": false, "error_type": null, @@ -23011,8 +14625,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__vulkan_amdvlk.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -23023,8 +14637,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 514.17, - "tps_std": 6.35, + "tps_mean": 609.37, + "tps_std": 2.58, "error": false, "error_type": null, "backend": "Vulkan", @@ -23036,8 +14650,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -23048,7 +14662,7 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 15.13, + "tps_mean": 18.25, "tps_std": 0.01, "error": false, "error_type": null, @@ -23061,8 +14675,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -23073,8 +14687,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 537.62, - "tps_std": 2.1, + "tps_mean": 423.31, + "tps_std": 2.25, "error": false, "error_type": null, "backend": "Vulkan", @@ -23086,8 +14700,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__vulkan_radv.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -23098,8 +14712,8 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 14.85, - "tps_std": 0.01, + "tps_mean": 16.82, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "Vulkan", @@ -23111,8 +14725,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__vulkan_radv.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -23123,8 +14737,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 583.32, - "tps_std": 3.38, + "tps_mean": 451.11, + "tps_std": 2.96, "error": false, "error_type": null, "backend": "Vulkan", @@ -23136,8 +14750,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__vulkan_radv__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -23148,7 +14762,7 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 14.86, + "tps_mean": 16.83, "tps_std": 0.01, "error": false, "error_type": null, @@ -23161,408 +14775,8 @@ "quant": "F32", "log": "results/gpt-oss-20b-F32__vulkan_radv__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 1164.39, - "tps_std": 11.24, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 67.35, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 1531.44, - "tps_std": 9.83, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 65.78, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 590.66, - "tps_std": 1.42, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 67.35, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 653.8, - "tps_std": 1.17, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 65.72, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 1160.12, - "tps_std": 12.72, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 67.19, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 1539.79, - "tps_std": 14.33, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 65.81, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 591.28, - "tps_std": 2.68, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 67.35, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 655.1, - "tps_std": 1.75, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 65.85, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -23573,8 +14787,8 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 1276.57, - "tps_std": 15.26, + "tps_mean": 1247.4, + "tps_std": 5.38, "error": false, "error_type": null, "backend": "ROCm", @@ -23586,8 +14800,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -23598,8 +14812,8 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 67.47, - "tps_std": 0.0, + "tps_mean": 67.13, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "ROCm", @@ -23611,8 +14825,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -23623,8 +14837,8 @@ "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 1520.24, - "tps_std": 18.05, + "tps_mean": 1529.26, + "tps_std": 3.68, "error": false, "error_type": null, "backend": "ROCm", @@ -23636,8 +14850,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -23648,107 +14862,107 @@ "env_variant": "rocwmma", "fa": true, "test": "tg128", + "tps_mean": 68.05, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 1375.72, + "tps_std": 12.99, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 67.16, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 1585.34, + "tps_std": 7.29, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", "tps_mean": 68.08, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 1335.36, - "tps_std": 7.22, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 67.28, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 1575.76, - "tps_std": 15.77, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 68.18, "tps_std": 0.02, "error": false, "error_type": null, @@ -23761,8 +14975,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -23773,8 +14987,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 1270.02, - "tps_std": 3.61, + "tps_mean": 1220.54, + "tps_std": 7.38, "error": false, "error_type": null, "backend": "ROCm", @@ -23798,8 +15012,8 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 67.37, - "tps_std": 0.01, + "tps_mean": 67.22, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -23823,8 +15037,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 1533.65, - "tps_std": 17.58, + "tps_mean": 1534.52, + "tps_std": 6.57, "error": false, "error_type": null, "backend": "ROCm", @@ -23848,8 +15062,8 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 68.13, - "tps_std": 0.01, + "tps_mean": 68.16, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "ROCm", @@ -23873,8 +15087,8 @@ "env_variant": "hblt0", "fa": false, "test": "pp512", - "tps_mean": 1337.89, - "tps_std": 14.39, + "tps_mean": 1329.52, + "tps_std": 7.97, "error": false, "error_type": null, "backend": "ROCm", @@ -23898,7 +15112,7 @@ "env_variant": "hblt0", "fa": false, "test": "tg128", - "tps_mean": 67.39, + "tps_mean": 67.2, "tps_std": 0.01, "error": false, "error_type": null, @@ -23923,8 +15137,8 @@ "env_variant": "hblt0", "fa": true, "test": "pp512", - "tps_mean": 1587.21, - "tps_std": 12.01, + "tps_mean": 1601.11, + "tps_std": 22.8, "error": false, "error_type": null, "backend": "ROCm", @@ -23948,8 +15162,8 @@ "env_variant": "hblt0", "fa": true, "test": "tg128", - "tps_mean": 68.25, - "tps_std": 0.01, + "tps_mean": 68.21, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -23965,206 +15179,6 @@ "number": "6609" } }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": false, - "test": "pp512", - "tps_mean": 1222.12, - "tps_std": 10.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": false, - "test": "tg128", - "tps_mean": 67.34, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": true, - "test": "pp512", - "tps_mean": 1515.09, - "tps_std": 6.22, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": true, - "test": "tg128", - "tps_mean": 65.75, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 1335.14, - "tps_std": 17.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 67.33, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 1562.66, - "tps_std": 9.76, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 65.84, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", @@ -24173,8 +15187,8 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 1215.59, - "tps_std": 8.93, + "tps_mean": 1239.97, + "tps_std": 8.69, "error": false, "error_type": null, "backend": "ROCm", @@ -24186,8 +15200,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -24198,8 +15212,8 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 67.39, - "tps_std": 0.0, + "tps_mean": 67.1, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -24211,8 +15225,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -24223,8 +15237,8 @@ "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 1521.41, - "tps_std": 10.84, + "tps_mean": 1506.28, + "tps_std": 15.62, "error": false, "error_type": null, "backend": "ROCm", @@ -24236,8 +15250,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -24248,8 +15262,8 @@ "env_variant": "rocwmma", "fa": true, "test": "tg128", - "tps_mean": 65.89, - "tps_std": 0.01, + "tps_mean": 67.98, + "tps_std": 0.03, "error": false, "error_type": null, "backend": "ROCm", @@ -24261,8 +15275,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -24273,8 +15287,8 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "pp512", - "tps_mean": 1334.89, - "tps_std": 9.58, + "tps_mean": 1381.33, + "tps_std": 11.13, "error": false, "error_type": null, "backend": "ROCm", @@ -24286,8 +15300,8 @@ "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -24298,871 +15312,471 @@ "env_variant": "rocwmma-hblt0", "fa": false, "test": "tg128", - "tps_mean": 67.43, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 1567.58, - "tps_std": 12.62, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 65.78, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 1219.34, - "tps_std": 5.57, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 67.37, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 1530.7, - "tps_std": 9.71, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 65.84, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 1331.0, - "tps_std": 21.19, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 67.41, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 1575.63, - "tps_std": 16.6, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 65.76, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 1498.39, - "tps_std": 12.53, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 74.08, - "tps_std": 0.09, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 1914.72, - "tps_std": 22.77, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 72.57, - "tps_std": 0.12, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 1002.66, - "tps_std": 7.71, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_radv.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": false, - "test": "tg128", - "tps_mean": 74.77, - "tps_std": 0.18, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_radv.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 1204.49, - "tps_std": 13.52, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 74.94, - "tps_std": 0.14, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 979.46, - "tps_std": 1.57, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 49.9, + "tps_mean": 67.13, "tps_std": 0.02, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma.log", + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 1100.15, - "tps_std": 1.95, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_3-rocwmma", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 49.29, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 348.31, - "tps_std": 0.71, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 49.9, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", "env_variant": "rocwmma-hblt0", "fa": true, "test": "pp512", - "tps_mean": 366.05, - "tps_std": 1.98, + "tps_mean": 1563.47, + "tps_std": 11.28, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__hblt0__fa1.log", + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_3-rocwmma-hblt0", - "env_base": "rocm6_4_3", + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", "env_variant": "rocwmma-hblt0", "fa": true, "test": "tg128", - "tps_mean": 49.32, + "tps_mean": 67.91, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 1239.41, + "tps_std": 5.44, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 67.1, "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_3-rocwmma__hblt0__fa1.log", + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc", + "env_base": "rocm7_rc", "env_variant": null, - "fa": false, + "fa": true, "test": "pp512", - "tps_mean": 979.46, - "tps_std": 2.1, + "tps_mean": 1508.59, + "tps_std": 7.75, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_3.log", + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc", + "env_base": "rocm7_rc", "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 67.92, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 1368.12, + "tps_std": 12.11, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", "fa": false, "test": "tg128", - "tps_mean": 49.9, + "tps_mean": 67.09, "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_3.log", + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", "fa": true, "test": "pp512", - "tps_mean": 1105.32, - "tps_std": 2.28, + "tps_mean": 1566.75, + "tps_std": 13.55, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_3__fa1.log", + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_3", - "env_base": "rocm6_4_3", - "env_variant": null, + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", "fa": true, "test": "tg128", - "tps_mean": 49.24, + "tps_mean": 67.99, "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_3__fa1.log", + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 348.63, - "tps_std": 0.64, + "tps_mean": 1515.08, + "tps_std": 10.36, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_3__hblt0.log", + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 49.81, - "tps_std": 0.0, + "tps_mean": 74.59, + "tps_std": 0.07, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_3__hblt0.log", + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 368.28, - "tps_std": 0.81, + "tps_mean": 1908.57, + "tps_std": 17.12, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_3__hblt0__fa1.log", + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_3-hblt0", - "env_base": "rocm6_4_3", - "env_variant": "hblt0", + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 49.35, - "tps_std": 0.01, + "tps_mean": 72.91, + "tps_std": 0.04, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_3__hblt0__fa1.log", + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 1097.23, + "tps_std": 7.32, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_radv.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 74.95, + "tps_std": 0.4, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_radv.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 1340.77, + "tps_std": 10.85, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "test": "tg128", + "tps_mean": 75.19, + "tps_std": 0.11, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" } }, { @@ -25173,8 +15787,8 @@ "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 979.59, - "tps_std": 0.72, + "tps_mean": 985.32, + "tps_std": 1.37, "error": false, "error_type": null, "backend": "ROCm", @@ -25186,8 +15800,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -25198,7 +15812,7 @@ "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 49.85, + "tps_mean": 50.21, "tps_std": 0.01, "error": false, "error_type": null, @@ -25211,8 +15825,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -25223,8 +15837,8 @@ "env_variant": "rocwmma", "fa": true, "test": "pp512", - "tps_mean": 1098.0, - "tps_std": 4.05, + "tps_mean": 1101.41, + "tps_std": 1.79, "error": false, "error_type": null, "backend": "ROCm", @@ -25236,8 +15850,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1.log", "build": { - "hash": "4807e8f9", - "number": "6609" + "hash": "11f0af55", + "number": "6736" } }, { @@ -25248,6 +15862,206 @@ "env_variant": "rocwmma", "fa": true, "test": "tg128", + "tps_mean": 49.92, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 910.75, + "tps_std": 2.97, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 50.19, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "pp512", + "tps_mean": 1009.78, + "tps_std": 2.26, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "test": "tg128", + "tps_mean": 49.91, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1.log", + "build": { + "hash": "11f0af55", + "number": "6736" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": false, + "test": "pp512", + "tps_mean": 980.87, + "tps_std": 2.46, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": false, + "test": "tg128", + "tps_mean": 49.86, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "test": "pp512", + "tps_mean": 1113.8, + "tps_std": 2.42, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log", + "build": { + "hash": "4807e8f9", + "number": "6609" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "test": "tg128", "tps_mean": 49.4, "tps_std": 0.01, "error": false, @@ -25259,7 +16073,7 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1.log", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log", "build": { "hash": "4807e8f9", "number": "6609" @@ -25268,13 +16082,13 @@ { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4-rocwmma-hblt0", + "env": "rocm6_4_4-hblt0", "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", + "env_variant": "hblt0", "fa": false, "test": "pp512", - "tps_mean": 899.84, - "tps_std": 2.29, + "tps_mean": 896.7, + "tps_std": 0.33, "error": false, "error_type": null, "backend": "ROCm", @@ -25284,7 +16098,7 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0.log", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0.log", "build": { "hash": "4807e8f9", "number": "6609" @@ -25293,109 +16107,9 @@ { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4-rocwmma-hblt0", + "env": "rocm6_4_4-hblt0", "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 49.81, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 1005.78, - "tps_std": 1.42, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 49.37, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": false, - "test": "pp512", - "tps_mean": 979.86, - "tps_std": 1.66, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, + "env_variant": "hblt0", "fa": false, "test": "tg128", "tps_mean": 49.87, @@ -25409,106 +16123,6 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "test": "pp512", - "tps_mean": 1117.04, - "tps_std": 3.47, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "test": "tg128", - "tps_mean": 49.38, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 895.65, - "tps_std": 0.66, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0.log", - "build": { - "hash": "4807e8f9", - "number": "6609" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 49.89, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0.log", "build": { "hash": "4807e8f9", @@ -25523,8 +16137,8 @@ "env_variant": "hblt0", "fa": true, "test": "pp512", - "tps_mean": 1020.22, - "tps_std": 1.63, + "tps_mean": 1017.14, + "tps_std": 1.96, "error": false, "error_type": null, "backend": "ROCm", @@ -25548,7 +16162,7 @@ "env_variant": "hblt0", "fa": true, "test": "tg128", - "tps_mean": 49.36, + "tps_mean": 49.4, "tps_std": 0.01, "error": false, "error_type": null, @@ -25568,13 +16182,13 @@ { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc-rocwmma-fa_all_quants", + "env": "rocm7_rc-rocwmma", "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", + "env_variant": "rocwmma", "fa": false, "test": "pp512", - "tps_mean": 977.63, - "tps_std": 2.98, + "tps_mean": 990.88, + "tps_std": 3.03, "error": false, "error_type": null, "backend": "ROCm", @@ -25584,21 +16198,121 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants.log", + "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc-rocwmma-fa_all_quants", + "env": "rocm7_rc-rocwmma", "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", + "env_variant": "rocwmma", "fa": false, "test": "tg128", - "tps_mean": 49.91, + "tps_mean": 50.2, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "pp512", + "tps_mean": 1092.96, + "tps_std": 3.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "test": "tg128", + "tps_mean": 49.81, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "pp512", + "tps_mean": 875.3, + "tps_std": 2.5, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0.log", + "build": { + "hash": "a3cb0474", + "number": "6735" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": false, + "test": "tg128", + "tps_mean": 50.23, "tps_std": 0.02, "error": false, "error_type": null, @@ -25609,310 +16323,10 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": true, - "test": "pp512", - "tps_mean": 1097.55, - "tps_std": 1.49, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc-rocwmma-fa_all_quants", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants", - "fa": true, - "test": "tg128", - "tps_mean": 49.33, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 860.3, - "tps_std": 0.57, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 49.89, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": true, - "test": "pp512", - "tps_mean": 953.79, - "tps_std": 3.6, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc-rocwmma-fa_all_quants-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-fa_all_quants-hblt0", - "fa": true, - "test": "tg128", - "tps_mean": 49.35, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma-fa_all_quants__hblt0__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "pp512", - "tps_mean": 984.61, - "tps_std": 2.65, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": false, - "test": "tg128", - "tps_mean": 49.94, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "pp512", - "tps_mean": 1095.5, - "tps_std": 2.69, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "test": "tg128", - "tps_mean": 49.34, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "pp512", - "tps_mean": 859.46, - "tps_std": 1.91, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0.log", "build": { - "hash": "f1fbffb5", - "number": "6486" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": false, - "test": "tg128", - "tps_mean": 49.9, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0.log", - "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -25923,8 +16337,8 @@ "env_variant": "rocwmma-hblt0", "fa": true, "test": "pp512", - "tps_mean": 952.18, - "tps_std": 1.68, + "tps_mean": 956.75, + "tps_std": 3.28, "error": false, "error_type": null, "backend": "ROCm", @@ -25936,8 +16350,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -25948,7 +16362,7 @@ "env_variant": "rocwmma-hblt0", "fa": true, "test": "tg128", - "tps_mean": 49.32, + "tps_mean": 49.98, "tps_std": 0.01, "error": false, "error_type": null, @@ -25961,8 +16375,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -25973,8 +16387,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 980.24, - "tps_std": 1.4, + "tps_mean": 987.11, + "tps_std": 2.95, "error": false, "error_type": null, "backend": "ROCm", @@ -25986,8 +16400,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -25998,8 +16412,8 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 49.9, - "tps_std": 0.0, + "tps_mean": 50.23, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -26011,8 +16425,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -26023,8 +16437,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 1100.05, - "tps_std": 4.01, + "tps_mean": 1107.83, + "tps_std": 1.63, "error": false, "error_type": null, "backend": "ROCm", @@ -26036,8 +16450,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -26048,7 +16462,7 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 49.29, + "tps_mean": 49.98, "tps_std": 0.01, "error": false, "error_type": null, @@ -26061,8 +16475,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -26073,8 +16487,8 @@ "env_variant": "hblt0", "fa": false, "test": "pp512", - "tps_mean": 860.23, - "tps_std": 0.94, + "tps_mean": 873.58, + "tps_std": 1.82, "error": false, "error_type": null, "backend": "ROCm", @@ -26086,8 +16500,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc__hblt0.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -26098,8 +16512,8 @@ "env_variant": "hblt0", "fa": false, "test": "tg128", - "tps_mean": 49.92, - "tps_std": 0.01, + "tps_mean": 50.15, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "ROCm", @@ -26111,8 +16525,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc__hblt0.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -26123,8 +16537,8 @@ "env_variant": "hblt0", "fa": true, "test": "pp512", - "tps_mean": 958.47, - "tps_std": 2.31, + "tps_mean": 966.53, + "tps_std": 1.48, "error": false, "error_type": null, "backend": "ROCm", @@ -26136,8 +16550,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -26148,7 +16562,7 @@ "env_variant": "hblt0", "fa": true, "test": "tg128", - "tps_mean": 49.29, + "tps_mean": 49.81, "tps_std": 0.01, "error": false, "error_type": null, @@ -26161,8 +16575,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -26173,8 +16587,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 1317.02, - "tps_std": 4.04, + "tps_mean": 1325.29, + "tps_std": 2.18, "error": false, "error_type": null, "backend": "Vulkan", @@ -26186,8 +16600,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -26198,8 +16612,8 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 53.59, - "tps_std": 0.07, + "tps_mean": 53.69, + "tps_std": 0.09, "error": false, "error_type": null, "backend": "Vulkan", @@ -26211,8 +16625,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -26223,8 +16637,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 1380.42, - "tps_std": 7.77, + "tps_mean": 1376.09, + "tps_std": 0.77, "error": false, "error_type": null, "backend": "Vulkan", @@ -26236,8 +16650,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -26248,8 +16662,8 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 52.95, - "tps_std": 0.07, + "tps_mean": 53.23, + "tps_std": 0.06, "error": false, "error_type": null, "backend": "Vulkan", @@ -26261,8 +16675,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -26273,8 +16687,8 @@ "env_variant": null, "fa": false, "test": "pp512", - "tps_mean": 868.7, - "tps_std": 8.94, + "tps_mean": 1005.9, + "tps_std": 1.71, "error": false, "error_type": null, "backend": "Vulkan", @@ -26286,8 +16700,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__vulkan_radv.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -26298,8 +16712,8 @@ "env_variant": null, "fa": false, "test": "tg128", - "tps_mean": 54.37, - "tps_std": 0.04, + "tps_mean": 54.61, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "Vulkan", @@ -26311,8 +16725,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__vulkan_radv.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -26323,8 +16737,8 @@ "env_variant": null, "fa": true, "test": "pp512", - "tps_mean": 957.23, - "tps_std": 9.23, + "tps_mean": 1096.08, + "tps_std": 2.8, "error": false, "error_type": null, "backend": "Vulkan", @@ -26336,8 +16750,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__vulkan_radv__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } }, { @@ -26348,8 +16762,8 @@ "env_variant": null, "fa": true, "test": "tg128", - "tps_mean": 53.49, - "tps_std": 0.04, + "tps_mean": 53.86, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "Vulkan", @@ -26361,8 +16775,8 @@ "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__vulkan_radv__fa1.log", "build": { - "hash": "f1fbffb5", - "number": "6486" + "hash": "a3cb0474", + "number": "6735" } } ]